The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/mm/compaction.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  * linux/mm/compaction.c
    3  *
    4  * Memory compaction for the reduction of external fragmentation. Note that
    5  * this heavily depends upon page migration to do all the real heavy
    6  * lifting
    7  *
    8  * Copyright IBM Corp. 2007-2010 Mel Gorman <mel@csn.ul.ie>
    9  */
   10 #include <linux/swap.h>
   11 #include <linux/migrate.h>
   12 #include <linux/compaction.h>
   13 #include <linux/mm_inline.h>
   14 #include <linux/backing-dev.h>
   15 #include <linux/sysctl.h>
   16 #include <linux/sysfs.h>
   17 #include <linux/balloon_compaction.h>
   18 #include "internal.h"
   19 
   20 #ifdef CONFIG_COMPACTION
   21 static inline void count_compact_event(enum vm_event_item item)
   22 {
   23         count_vm_event(item);
   24 }
   25 
   26 static inline void count_compact_events(enum vm_event_item item, long delta)
   27 {
   28         count_vm_events(item, delta);
   29 }
   30 #else
   31 #define count_compact_event(item) do { } while (0)
   32 #define count_compact_events(item, delta) do { } while (0)
   33 #endif
   34 
   35 #if defined CONFIG_COMPACTION || defined CONFIG_CMA
   36 
   37 #define CREATE_TRACE_POINTS
   38 #include <trace/events/compaction.h>
   39 
   40 static unsigned long release_freepages(struct list_head *freelist)
   41 {
   42         struct page *page, *next;
   43         unsigned long count = 0;
   44 
   45         list_for_each_entry_safe(page, next, freelist, lru) {
   46                 list_del(&page->lru);
   47                 __free_page(page);
   48                 count++;
   49         }
   50 
   51         return count;
   52 }
   53 
   54 static void map_pages(struct list_head *list)
   55 {
   56         struct page *page;
   57 
   58         list_for_each_entry(page, list, lru) {
   59                 arch_alloc_page(page, 0);
   60                 kernel_map_pages(page, 1, 1);
   61         }
   62 }
   63 
   64 static inline bool migrate_async_suitable(int migratetype)
   65 {
   66         return is_migrate_cma(migratetype) || migratetype == MIGRATE_MOVABLE;
   67 }
   68 
   69 #ifdef CONFIG_COMPACTION
   70 /* Returns true if the pageblock should be scanned for pages to isolate. */
   71 static inline bool isolation_suitable(struct compact_control *cc,
   72                                         struct page *page)
   73 {
   74         if (cc->ignore_skip_hint)
   75                 return true;
   76 
   77         return !get_pageblock_skip(page);
   78 }
   79 
   80 /*
   81  * This function is called to clear all cached information on pageblocks that
   82  * should be skipped for page isolation when the migrate and free page scanner
   83  * meet.
   84  */
   85 static void __reset_isolation_suitable(struct zone *zone)
   86 {
   87         unsigned long start_pfn = zone->zone_start_pfn;
   88         unsigned long end_pfn = zone->zone_start_pfn + zone->spanned_pages;
   89         unsigned long pfn;
   90 
   91         zone->compact_cached_migrate_pfn = start_pfn;
   92         zone->compact_cached_free_pfn = end_pfn;
   93         zone->compact_blockskip_flush = false;
   94 
   95         /* Walk the zone and mark every pageblock as suitable for isolation */
   96         for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) {
   97                 struct page *page;
   98 
   99                 cond_resched();
  100 
  101                 if (!pfn_valid(pfn))
  102                         continue;
  103 
  104                 page = pfn_to_page(pfn);
  105                 if (zone != page_zone(page))
  106                         continue;
  107 
  108                 clear_pageblock_skip(page);
  109         }
  110 }
  111 
  112 void reset_isolation_suitable(pg_data_t *pgdat)
  113 {
  114         int zoneid;
  115 
  116         for (zoneid = 0; zoneid < MAX_NR_ZONES; zoneid++) {
  117                 struct zone *zone = &pgdat->node_zones[zoneid];
  118                 if (!populated_zone(zone))
  119                         continue;
  120 
  121                 /* Only flush if a full compaction finished recently */
  122                 if (zone->compact_blockskip_flush)
  123                         __reset_isolation_suitable(zone);
  124         }
  125 }
  126 
  127 /*
  128  * If no pages were isolated then mark this pageblock to be skipped in the
  129  * future. The information is later cleared by __reset_isolation_suitable().
  130  */
  131 static void update_pageblock_skip(struct compact_control *cc,
  132                         struct page *page, unsigned long nr_isolated,
  133                         bool migrate_scanner)
  134 {
  135         struct zone *zone = cc->zone;
  136         if (!page)
  137                 return;
  138 
  139         if (!nr_isolated) {
  140                 unsigned long pfn = page_to_pfn(page);
  141                 set_pageblock_skip(page);
  142 
  143                 /* Update where compaction should restart */
  144                 if (migrate_scanner) {
  145                         if (!cc->finished_update_migrate &&
  146                             pfn > zone->compact_cached_migrate_pfn)
  147                                 zone->compact_cached_migrate_pfn = pfn;
  148                 } else {
  149                         if (!cc->finished_update_free &&
  150                             pfn < zone->compact_cached_free_pfn)
  151                                 zone->compact_cached_free_pfn = pfn;
  152                 }
  153         }
  154 }
  155 #else
  156 static inline bool isolation_suitable(struct compact_control *cc,
  157                                         struct page *page)
  158 {
  159         return true;
  160 }
  161 
  162 static void update_pageblock_skip(struct compact_control *cc,
  163                         struct page *page, unsigned long nr_isolated,
  164                         bool migrate_scanner)
  165 {
  166 }
  167 #endif /* CONFIG_COMPACTION */
  168 
  169 static inline bool should_release_lock(spinlock_t *lock)
  170 {
  171         return need_resched() || spin_is_contended(lock);
  172 }
  173 
  174 /*
  175  * Compaction requires the taking of some coarse locks that are potentially
  176  * very heavily contended. Check if the process needs to be scheduled or
  177  * if the lock is contended. For async compaction, back out in the event
  178  * if contention is severe. For sync compaction, schedule.
  179  *
  180  * Returns true if the lock is held.
  181  * Returns false if the lock is released and compaction should abort
  182  */
  183 static bool compact_checklock_irqsave(spinlock_t *lock, unsigned long *flags,
  184                                       bool locked, struct compact_control *cc)
  185 {
  186         if (should_release_lock(lock)) {
  187                 if (locked) {
  188                         spin_unlock_irqrestore(lock, *flags);
  189                         locked = false;
  190                 }
  191 
  192                 /* async aborts if taking too long or contended */
  193                 if (!cc->sync) {
  194                         cc->contended = true;
  195                         return false;
  196                 }
  197 
  198                 cond_resched();
  199         }
  200 
  201         if (!locked)
  202                 spin_lock_irqsave(lock, *flags);
  203         return true;
  204 }
  205 
  206 static inline bool compact_trylock_irqsave(spinlock_t *lock,
  207                         unsigned long *flags, struct compact_control *cc)
  208 {
  209         return compact_checklock_irqsave(lock, flags, false, cc);
  210 }
  211 
  212 /* Returns true if the page is within a block suitable for migration to */
  213 static bool suitable_migration_target(struct page *page)
  214 {
  215         int migratetype = get_pageblock_migratetype(page);
  216 
  217         /* Don't interfere with memory hot-remove or the min_free_kbytes blocks */
  218         if (migratetype == MIGRATE_ISOLATE || migratetype == MIGRATE_RESERVE)
  219                 return false;
  220 
  221         /* If the page is a large free page, then allow migration */
  222         if (PageBuddy(page) && page_order(page) >= pageblock_order)
  223                 return true;
  224 
  225         /* If the block is MIGRATE_MOVABLE or MIGRATE_CMA, allow migration */
  226         if (migrate_async_suitable(migratetype))
  227                 return true;
  228 
  229         /* Otherwise skip the block */
  230         return false;
  231 }
  232 
  233 /*
  234  * Isolate free pages onto a private freelist. Caller must hold zone->lock.
  235  * If @strict is true, will abort returning 0 on any invalid PFNs or non-free
  236  * pages inside of the pageblock (even though it may still end up isolating
  237  * some pages).
  238  */
  239 static unsigned long isolate_freepages_block(struct compact_control *cc,
  240                                 unsigned long blockpfn,
  241                                 unsigned long end_pfn,
  242                                 struct list_head *freelist,
  243                                 bool strict)
  244 {
  245         int nr_scanned = 0, total_isolated = 0;
  246         struct page *cursor, *valid_page = NULL;
  247         unsigned long nr_strict_required = end_pfn - blockpfn;
  248         unsigned long flags;
  249         bool locked = false;
  250 
  251         cursor = pfn_to_page(blockpfn);
  252 
  253         /* Isolate free pages. */
  254         for (; blockpfn < end_pfn; blockpfn++, cursor++) {
  255                 int isolated, i;
  256                 struct page *page = cursor;
  257 
  258                 nr_scanned++;
  259                 if (!pfn_valid_within(blockpfn))
  260                         continue;
  261                 if (!valid_page)
  262                         valid_page = page;
  263                 if (!PageBuddy(page))
  264                         continue;
  265 
  266                 /*
  267                  * The zone lock must be held to isolate freepages.
  268                  * Unfortunately this is a very coarse lock and can be
  269                  * heavily contended if there are parallel allocations
  270                  * or parallel compactions. For async compaction do not
  271                  * spin on the lock and we acquire the lock as late as
  272                  * possible.
  273                  */
  274                 locked = compact_checklock_irqsave(&cc->zone->lock, &flags,
  275                                                                 locked, cc);
  276                 if (!locked)
  277                         break;
  278 
  279                 /* Recheck this is a suitable migration target under lock */
  280                 if (!strict && !suitable_migration_target(page))
  281                         break;
  282 
  283                 /* Recheck this is a buddy page under lock */
  284                 if (!PageBuddy(page))
  285                         continue;
  286 
  287                 /* Found a free page, break it into order-0 pages */
  288                 isolated = split_free_page(page);
  289                 if (!isolated && strict)
  290                         break;
  291                 total_isolated += isolated;
  292                 for (i = 0; i < isolated; i++) {
  293                         list_add(&page->lru, freelist);
  294                         page++;
  295                 }
  296 
  297                 /* If a page was split, advance to the end of it */
  298                 if (isolated) {
  299                         blockpfn += isolated - 1;
  300                         cursor += isolated - 1;
  301                 }
  302         }
  303 
  304         trace_mm_compaction_isolate_freepages(nr_scanned, total_isolated);
  305 
  306         /*
  307          * If strict isolation is requested by CMA then check that all the
  308          * pages requested were isolated. If there were any failures, 0 is
  309          * returned and CMA will fail.
  310          */
  311         if (strict && nr_strict_required > total_isolated)
  312                 total_isolated = 0;
  313 
  314         if (locked)
  315                 spin_unlock_irqrestore(&cc->zone->lock, flags);
  316 
  317         /* Update the pageblock-skip if the whole pageblock was scanned */
  318         if (blockpfn == end_pfn)
  319                 update_pageblock_skip(cc, valid_page, total_isolated, false);
  320 
  321         count_compact_events(COMPACTFREE_SCANNED, nr_scanned);
  322         if (total_isolated)
  323                 count_compact_events(COMPACTISOLATED, total_isolated);
  324         return total_isolated;
  325 }
  326 
  327 /**
  328  * isolate_freepages_range() - isolate free pages.
  329  * @start_pfn: The first PFN to start isolating.
  330  * @end_pfn:   The one-past-last PFN.
  331  *
  332  * Non-free pages, invalid PFNs, or zone boundaries within the
  333  * [start_pfn, end_pfn) range are considered errors, cause function to
  334  * undo its actions and return zero.
  335  *
  336  * Otherwise, function returns one-past-the-last PFN of isolated page
  337  * (which may be greater then end_pfn if end fell in a middle of
  338  * a free page).
  339  */
  340 unsigned long
  341 isolate_freepages_range(struct compact_control *cc,
  342                         unsigned long start_pfn, unsigned long end_pfn)
  343 {
  344         unsigned long isolated, pfn, block_end_pfn;
  345         LIST_HEAD(freelist);
  346 
  347         for (pfn = start_pfn; pfn < end_pfn; pfn += isolated) {
  348                 if (!pfn_valid(pfn) || cc->zone != page_zone(pfn_to_page(pfn)))
  349                         break;
  350 
  351                 /*
  352                  * On subsequent iterations ALIGN() is actually not needed,
  353                  * but we keep it that we not to complicate the code.
  354                  */
  355                 block_end_pfn = ALIGN(pfn + 1, pageblock_nr_pages);
  356                 block_end_pfn = min(block_end_pfn, end_pfn);
  357 
  358                 isolated = isolate_freepages_block(cc, pfn, block_end_pfn,
  359                                                    &freelist, true);
  360 
  361                 /*
  362                  * In strict mode, isolate_freepages_block() returns 0 if
  363                  * there are any holes in the block (ie. invalid PFNs or
  364                  * non-free pages).
  365                  */
  366                 if (!isolated)
  367                         break;
  368 
  369                 /*
  370                  * If we managed to isolate pages, it is always (1 << n) *
  371                  * pageblock_nr_pages for some non-negative n.  (Max order
  372                  * page may span two pageblocks).
  373                  */
  374         }
  375 
  376         /* split_free_page does not map the pages */
  377         map_pages(&freelist);
  378 
  379         if (pfn < end_pfn) {
  380                 /* Loop terminated early, cleanup. */
  381                 release_freepages(&freelist);
  382                 return 0;
  383         }
  384 
  385         /* We don't use freelists for anything. */
  386         return pfn;
  387 }
  388 
  389 /* Update the number of anon and file isolated pages in the zone */
  390 static void acct_isolated(struct zone *zone, bool locked, struct compact_control *cc)
  391 {
  392         struct page *page;
  393         unsigned int count[2] = { 0, };
  394 
  395         list_for_each_entry(page, &cc->migratepages, lru)
  396                 count[!!page_is_file_cache(page)]++;
  397 
  398         /* If locked we can use the interrupt unsafe versions */
  399         if (locked) {
  400                 __mod_zone_page_state(zone, NR_ISOLATED_ANON, count[0]);
  401                 __mod_zone_page_state(zone, NR_ISOLATED_FILE, count[1]);
  402         } else {
  403                 mod_zone_page_state(zone, NR_ISOLATED_ANON, count[0]);
  404                 mod_zone_page_state(zone, NR_ISOLATED_FILE, count[1]);
  405         }
  406 }
  407 
  408 /* Similar to reclaim, but different enough that they don't share logic */
  409 static bool too_many_isolated(struct zone *zone)
  410 {
  411         unsigned long active, inactive, isolated;
  412 
  413         inactive = zone_page_state(zone, NR_INACTIVE_FILE) +
  414                                         zone_page_state(zone, NR_INACTIVE_ANON);
  415         active = zone_page_state(zone, NR_ACTIVE_FILE) +
  416                                         zone_page_state(zone, NR_ACTIVE_ANON);
  417         isolated = zone_page_state(zone, NR_ISOLATED_FILE) +
  418                                         zone_page_state(zone, NR_ISOLATED_ANON);
  419 
  420         return isolated > (inactive + active) / 2;
  421 }
  422 
  423 /**
  424  * isolate_migratepages_range() - isolate all migrate-able pages in range.
  425  * @zone:       Zone pages are in.
  426  * @cc:         Compaction control structure.
  427  * @low_pfn:    The first PFN of the range.
  428  * @end_pfn:    The one-past-the-last PFN of the range.
  429  * @unevictable: true if it allows to isolate unevictable pages
  430  *
  431  * Isolate all pages that can be migrated from the range specified by
  432  * [low_pfn, end_pfn).  Returns zero if there is a fatal signal
  433  * pending), otherwise PFN of the first page that was not scanned
  434  * (which may be both less, equal to or more then end_pfn).
  435  *
  436  * Assumes that cc->migratepages is empty and cc->nr_migratepages is
  437  * zero.
  438  *
  439  * Apart from cc->migratepages and cc->nr_migratetypes this function
  440  * does not modify any cc's fields, in particular it does not modify
  441  * (or read for that matter) cc->migrate_pfn.
  442  */
  443 unsigned long
  444 isolate_migratepages_range(struct zone *zone, struct compact_control *cc,
  445                 unsigned long low_pfn, unsigned long end_pfn, bool unevictable)
  446 {
  447         unsigned long last_pageblock_nr = 0, pageblock_nr;
  448         unsigned long nr_scanned = 0, nr_isolated = 0;
  449         struct list_head *migratelist = &cc->migratepages;
  450         isolate_mode_t mode = 0;
  451         struct lruvec *lruvec;
  452         unsigned long flags;
  453         bool locked = false;
  454         struct page *page = NULL, *valid_page = NULL;
  455 
  456         /*
  457          * Ensure that there are not too many pages isolated from the LRU
  458          * list by either parallel reclaimers or compaction. If there are,
  459          * delay for some time until fewer pages are isolated
  460          */
  461         while (unlikely(too_many_isolated(zone))) {
  462                 /* async migration should just abort */
  463                 if (!cc->sync)
  464                         return 0;
  465 
  466                 congestion_wait(BLK_RW_ASYNC, HZ/10);
  467 
  468                 if (fatal_signal_pending(current))
  469                         return 0;
  470         }
  471 
  472         /* Time to isolate some pages for migration */
  473         cond_resched();
  474         for (; low_pfn < end_pfn; low_pfn++) {
  475                 /* give a chance to irqs before checking need_resched() */
  476                 if (locked && !((low_pfn+1) % SWAP_CLUSTER_MAX)) {
  477                         if (should_release_lock(&zone->lru_lock)) {
  478                                 spin_unlock_irqrestore(&zone->lru_lock, flags);
  479                                 locked = false;
  480                         }
  481                 }
  482 
  483                 /*
  484                  * migrate_pfn does not necessarily start aligned to a
  485                  * pageblock. Ensure that pfn_valid is called when moving
  486                  * into a new MAX_ORDER_NR_PAGES range in case of large
  487                  * memory holes within the zone
  488                  */
  489                 if ((low_pfn & (MAX_ORDER_NR_PAGES - 1)) == 0) {
  490                         if (!pfn_valid(low_pfn)) {
  491                                 low_pfn += MAX_ORDER_NR_PAGES - 1;
  492                                 continue;
  493                         }
  494                 }
  495 
  496                 if (!pfn_valid_within(low_pfn))
  497                         continue;
  498                 nr_scanned++;
  499 
  500                 /*
  501                  * Get the page and ensure the page is within the same zone.
  502                  * See the comment in isolate_freepages about overlapping
  503                  * nodes. It is deliberate that the new zone lock is not taken
  504                  * as memory compaction should not move pages between nodes.
  505                  */
  506                 page = pfn_to_page(low_pfn);
  507                 if (page_zone(page) != zone)
  508                         continue;
  509 
  510                 if (!valid_page)
  511                         valid_page = page;
  512 
  513                 /* If isolation recently failed, do not retry */
  514                 pageblock_nr = low_pfn >> pageblock_order;
  515                 if (!isolation_suitable(cc, page))
  516                         goto next_pageblock;
  517 
  518                 /* Skip if free */
  519                 if (PageBuddy(page))
  520                         continue;
  521 
  522                 /*
  523                  * For async migration, also only scan in MOVABLE blocks. Async
  524                  * migration is optimistic to see if the minimum amount of work
  525                  * satisfies the allocation
  526                  */
  527                 if (!cc->sync && last_pageblock_nr != pageblock_nr &&
  528                     !migrate_async_suitable(get_pageblock_migratetype(page))) {
  529                         cc->finished_update_migrate = true;
  530                         goto next_pageblock;
  531                 }
  532 
  533                 /*
  534                  * Check may be lockless but that's ok as we recheck later.
  535                  * It's possible to migrate LRU pages and balloon pages
  536                  * Skip any other type of page
  537                  */
  538                 if (!PageLRU(page)) {
  539                         if (unlikely(balloon_page_movable(page))) {
  540                                 if (locked && balloon_page_isolate(page)) {
  541                                         /* Successfully isolated */
  542                                         cc->finished_update_migrate = true;
  543                                         list_add(&page->lru, migratelist);
  544                                         cc->nr_migratepages++;
  545                                         nr_isolated++;
  546                                         goto check_compact_cluster;
  547                                 }
  548                         }
  549                         continue;
  550                 }
  551 
  552                 /*
  553                  * PageLRU is set. lru_lock normally excludes isolation
  554                  * splitting and collapsing (collapsing has already happened
  555                  * if PageLRU is set) but the lock is not necessarily taken
  556                  * here and it is wasteful to take it just to check transhuge.
  557                  * Check TransHuge without lock and skip the whole pageblock if
  558                  * it's either a transhuge or hugetlbfs page, as calling
  559                  * compound_order() without preventing THP from splitting the
  560                  * page underneath us may return surprising results.
  561                  */
  562                 if (PageTransHuge(page)) {
  563                         if (!locked)
  564                                 goto next_pageblock;
  565                         low_pfn += (1 << compound_order(page)) - 1;
  566                         continue;
  567                 }
  568 
  569                 /* Check if it is ok to still hold the lock */
  570                 locked = compact_checklock_irqsave(&zone->lru_lock, &flags,
  571                                                                 locked, cc);
  572                 if (!locked || fatal_signal_pending(current))
  573                         break;
  574 
  575                 /* Recheck PageLRU and PageTransHuge under lock */
  576                 if (!PageLRU(page))
  577                         continue;
  578                 if (PageTransHuge(page)) {
  579                         low_pfn += (1 << compound_order(page)) - 1;
  580                         continue;
  581                 }
  582 
  583                 if (!cc->sync)
  584                         mode |= ISOLATE_ASYNC_MIGRATE;
  585 
  586                 if (unevictable)
  587                         mode |= ISOLATE_UNEVICTABLE;
  588 
  589                 lruvec = mem_cgroup_page_lruvec(page, zone);
  590 
  591                 /* Try isolate the page */
  592                 if (__isolate_lru_page(page, mode) != 0)
  593                         continue;
  594 
  595                 VM_BUG_ON(PageTransCompound(page));
  596 
  597                 /* Successfully isolated */
  598                 cc->finished_update_migrate = true;
  599                 del_page_from_lru_list(page, lruvec, page_lru(page));
  600                 list_add(&page->lru, migratelist);
  601                 cc->nr_migratepages++;
  602                 nr_isolated++;
  603 
  604 check_compact_cluster:
  605                 /* Avoid isolating too much */
  606                 if (cc->nr_migratepages == COMPACT_CLUSTER_MAX) {
  607                         ++low_pfn;
  608                         break;
  609                 }
  610 
  611                 continue;
  612 
  613 next_pageblock:
  614                 low_pfn += pageblock_nr_pages;
  615                 low_pfn = ALIGN(low_pfn, pageblock_nr_pages) - 1;
  616                 last_pageblock_nr = pageblock_nr;
  617         }
  618 
  619         acct_isolated(zone, locked, cc);
  620 
  621         if (locked)
  622                 spin_unlock_irqrestore(&zone->lru_lock, flags);
  623 
  624         /* Update the pageblock-skip if the whole pageblock was scanned */
  625         if (low_pfn == end_pfn)
  626                 update_pageblock_skip(cc, valid_page, nr_isolated, true);
  627 
  628         trace_mm_compaction_isolate_migratepages(nr_scanned, nr_isolated);
  629 
  630         count_compact_events(COMPACTMIGRATE_SCANNED, nr_scanned);
  631         if (nr_isolated)
  632                 count_compact_events(COMPACTISOLATED, nr_isolated);
  633 
  634         return low_pfn;
  635 }
  636 
  637 #endif /* CONFIG_COMPACTION || CONFIG_CMA */
  638 #ifdef CONFIG_COMPACTION
  639 /*
  640  * Based on information in the current compact_control, find blocks
  641  * suitable for isolating free pages from and then isolate them.
  642  */
  643 static void isolate_freepages(struct zone *zone,
  644                                 struct compact_control *cc)
  645 {
  646         struct page *page;
  647         unsigned long high_pfn, low_pfn, pfn, zone_end_pfn, end_pfn;
  648         int nr_freepages = cc->nr_freepages;
  649         struct list_head *freelist = &cc->freepages;
  650 
  651         /*
  652          * Initialise the free scanner. The starting point is where we last
  653          * scanned from (or the end of the zone if starting). The low point
  654          * is the end of the pageblock the migration scanner is using.
  655          */
  656         pfn = cc->free_pfn;
  657         low_pfn = cc->migrate_pfn + pageblock_nr_pages;
  658 
  659         /*
  660          * Take care that if the migration scanner is at the end of the zone
  661          * that the free scanner does not accidentally move to the next zone
  662          * in the next isolation cycle.
  663          */
  664         high_pfn = min(low_pfn, pfn);
  665 
  666         zone_end_pfn = zone->zone_start_pfn + zone->spanned_pages;
  667 
  668         /*
  669          * Isolate free pages until enough are available to migrate the
  670          * pages on cc->migratepages. We stop searching if the migrate
  671          * and free page scanners meet or enough free pages are isolated.
  672          */
  673         for (; pfn > low_pfn && cc->nr_migratepages > nr_freepages;
  674                                         pfn -= pageblock_nr_pages) {
  675                 unsigned long isolated;
  676 
  677                 if (!pfn_valid(pfn))
  678                         continue;
  679 
  680                 /*
  681                  * Check for overlapping nodes/zones. It's possible on some
  682                  * configurations to have a setup like
  683                  * node0 node1 node0
  684                  * i.e. it's possible that all pages within a zones range of
  685                  * pages do not belong to a single zone.
  686                  */
  687                 page = pfn_to_page(pfn);
  688                 if (page_zone(page) != zone)
  689                         continue;
  690 
  691                 /* Check the block is suitable for migration */
  692                 if (!suitable_migration_target(page))
  693                         continue;
  694 
  695                 /* If isolation recently failed, do not retry */
  696                 if (!isolation_suitable(cc, page))
  697                         continue;
  698 
  699                 /* Found a block suitable for isolating free pages from */
  700                 isolated = 0;
  701 
  702                 /*
  703                  * As pfn may not start aligned, pfn+pageblock_nr_page
  704                  * may cross a MAX_ORDER_NR_PAGES boundary and miss
  705                  * a pfn_valid check. Ensure isolate_freepages_block()
  706                  * only scans within a pageblock
  707                  */
  708                 end_pfn = ALIGN(pfn + 1, pageblock_nr_pages);
  709                 end_pfn = min(end_pfn, zone_end_pfn);
  710                 isolated = isolate_freepages_block(cc, pfn, end_pfn,
  711                                                    freelist, false);
  712                 nr_freepages += isolated;
  713 
  714                 /*
  715                  * Record the highest PFN we isolated pages from. When next
  716                  * looking for free pages, the search will restart here as
  717                  * page migration may have returned some pages to the allocator
  718                  */
  719                 if (isolated) {
  720                         cc->finished_update_free = true;
  721                         high_pfn = max(high_pfn, pfn);
  722                 }
  723         }
  724 
  725         /* split_free_page does not map the pages */
  726         map_pages(freelist);
  727 
  728         cc->free_pfn = high_pfn;
  729         cc->nr_freepages = nr_freepages;
  730 }
  731 
  732 /*
  733  * This is a migrate-callback that "allocates" freepages by taking pages
  734  * from the isolated freelists in the block we are migrating to.
  735  */
  736 static struct page *compaction_alloc(struct page *migratepage,
  737                                         unsigned long data,
  738                                         int **result)
  739 {
  740         struct compact_control *cc = (struct compact_control *)data;
  741         struct page *freepage;
  742 
  743         /* Isolate free pages if necessary */
  744         if (list_empty(&cc->freepages)) {
  745                 isolate_freepages(cc->zone, cc);
  746 
  747                 if (list_empty(&cc->freepages))
  748                         return NULL;
  749         }
  750 
  751         freepage = list_entry(cc->freepages.next, struct page, lru);
  752         list_del(&freepage->lru);
  753         cc->nr_freepages--;
  754 
  755         return freepage;
  756 }
  757 
  758 /*
  759  * We cannot control nr_migratepages and nr_freepages fully when migration is
  760  * running as migrate_pages() has no knowledge of compact_control. When
  761  * migration is complete, we count the number of pages on the lists by hand.
  762  */
  763 static void update_nr_listpages(struct compact_control *cc)
  764 {
  765         int nr_migratepages = 0;
  766         int nr_freepages = 0;
  767         struct page *page;
  768 
  769         list_for_each_entry(page, &cc->migratepages, lru)
  770                 nr_migratepages++;
  771         list_for_each_entry(page, &cc->freepages, lru)
  772                 nr_freepages++;
  773 
  774         cc->nr_migratepages = nr_migratepages;
  775         cc->nr_freepages = nr_freepages;
  776 }
  777 
  778 /* possible outcome of isolate_migratepages */
  779 typedef enum {
  780         ISOLATE_ABORT,          /* Abort compaction now */
  781         ISOLATE_NONE,           /* No pages isolated, continue scanning */
  782         ISOLATE_SUCCESS,        /* Pages isolated, migrate */
  783 } isolate_migrate_t;
  784 
  785 /*
  786  * Isolate all pages that can be migrated from the block pointed to by
  787  * the migrate scanner within compact_control.
  788  */
  789 static isolate_migrate_t isolate_migratepages(struct zone *zone,
  790                                         struct compact_control *cc)
  791 {
  792         unsigned long low_pfn, end_pfn;
  793 
  794         /* Do not scan outside zone boundaries */
  795         low_pfn = max(cc->migrate_pfn, zone->zone_start_pfn);
  796 
  797         /* Only scan within a pageblock boundary */
  798         end_pfn = ALIGN(low_pfn + pageblock_nr_pages, pageblock_nr_pages);
  799 
  800         /* Do not cross the free scanner or scan within a memory hole */
  801         if (end_pfn > cc->free_pfn || !pfn_valid(low_pfn)) {
  802                 cc->migrate_pfn = end_pfn;
  803                 return ISOLATE_NONE;
  804         }
  805 
  806         /* Perform the isolation */
  807         low_pfn = isolate_migratepages_range(zone, cc, low_pfn, end_pfn, false);
  808         if (!low_pfn || cc->contended)
  809                 return ISOLATE_ABORT;
  810 
  811         cc->migrate_pfn = low_pfn;
  812 
  813         return ISOLATE_SUCCESS;
  814 }
  815 
  816 static int compact_finished(struct zone *zone,
  817                             struct compact_control *cc)
  818 {
  819         unsigned int order;
  820         unsigned long watermark;
  821 
  822         if (fatal_signal_pending(current))
  823                 return COMPACT_PARTIAL;
  824 
  825         /* Compaction run completes if the migrate and free scanner meet */
  826         if (cc->free_pfn <= cc->migrate_pfn) {
  827                 /*
  828                  * Mark that the PG_migrate_skip information should be cleared
  829                  * by kswapd when it goes to sleep. kswapd does not set the
  830                  * flag itself as the decision to be clear should be directly
  831                  * based on an allocation request.
  832                  */
  833                 if (!current_is_kswapd())
  834                         zone->compact_blockskip_flush = true;
  835 
  836                 return COMPACT_COMPLETE;
  837         }
  838 
  839         /*
  840          * order == -1 is expected when compacting via
  841          * /proc/sys/vm/compact_memory
  842          */
  843         if (cc->order == -1)
  844                 return COMPACT_CONTINUE;
  845 
  846         /* Compaction run is not finished if the watermark is not met */
  847         watermark = low_wmark_pages(zone);
  848         watermark += (1 << cc->order);
  849 
  850         if (!zone_watermark_ok(zone, cc->order, watermark, 0, 0))
  851                 return COMPACT_CONTINUE;
  852 
  853         /* Direct compactor: Is a suitable page free? */
  854         for (order = cc->order; order < MAX_ORDER; order++) {
  855                 struct free_area *area = &zone->free_area[order];
  856 
  857                 /* Job done if page is free of the right migratetype */
  858                 if (!list_empty(&area->free_list[cc->migratetype]))
  859                         return COMPACT_PARTIAL;
  860 
  861                 /* Job done if allocation would set block type */
  862                 if (cc->order >= pageblock_order && area->nr_free)
  863                         return COMPACT_PARTIAL;
  864         }
  865 
  866         return COMPACT_CONTINUE;
  867 }
  868 
  869 /*
  870  * compaction_suitable: Is this suitable to run compaction on this zone now?
  871  * Returns
  872  *   COMPACT_SKIPPED  - If there are too few free pages for compaction
  873  *   COMPACT_PARTIAL  - If the allocation would succeed without compaction
  874  *   COMPACT_CONTINUE - If compaction should run now
  875  */
  876 unsigned long compaction_suitable(struct zone *zone, int order)
  877 {
  878         int fragindex;
  879         unsigned long watermark;
  880 
  881         /*
  882          * order == -1 is expected when compacting via
  883          * /proc/sys/vm/compact_memory
  884          */
  885         if (order == -1)
  886                 return COMPACT_CONTINUE;
  887 
  888         /*
  889          * Watermarks for order-0 must be met for compaction. Note the 2UL.
  890          * This is because during migration, copies of pages need to be
  891          * allocated and for a short time, the footprint is higher
  892          */
  893         watermark = low_wmark_pages(zone) + (2UL << order);
  894         if (!zone_watermark_ok(zone, 0, watermark, 0, 0))
  895                 return COMPACT_SKIPPED;
  896 
  897         /*
  898          * fragmentation index determines if allocation failures are due to
  899          * low memory or external fragmentation
  900          *
  901          * index of -1000 implies allocations might succeed depending on
  902          * watermarks
  903          * index towards 0 implies failure is due to lack of memory
  904          * index towards 1000 implies failure is due to fragmentation
  905          *
  906          * Only compact if a failure would be due to fragmentation.
  907          */
  908         fragindex = fragmentation_index(zone, order);
  909         if (fragindex >= 0 && fragindex <= sysctl_extfrag_threshold)
  910                 return COMPACT_SKIPPED;
  911 
  912         if (fragindex == -1000 && zone_watermark_ok(zone, order, watermark,
  913             0, 0))
  914                 return COMPACT_PARTIAL;
  915 
  916         return COMPACT_CONTINUE;
  917 }
  918 
  919 static int compact_zone(struct zone *zone, struct compact_control *cc)
  920 {
  921         int ret;
  922         unsigned long start_pfn = zone->zone_start_pfn;
  923         unsigned long end_pfn = zone->zone_start_pfn + zone->spanned_pages;
  924 
  925         ret = compaction_suitable(zone, cc->order);
  926         switch (ret) {
  927         case COMPACT_PARTIAL:
  928         case COMPACT_SKIPPED:
  929                 /* Compaction is likely to fail */
  930                 return ret;
  931         case COMPACT_CONTINUE:
  932                 /* Fall through to compaction */
  933                 ;
  934         }
  935 
  936         /*
  937          * Setup to move all movable pages to the end of the zone. Used cached
  938          * information on where the scanners should start but check that it
  939          * is initialised by ensuring the values are within zone boundaries.
  940          */
  941         cc->migrate_pfn = zone->compact_cached_migrate_pfn;
  942         cc->free_pfn = zone->compact_cached_free_pfn;
  943         if (cc->free_pfn < start_pfn || cc->free_pfn > end_pfn) {
  944                 cc->free_pfn = end_pfn & ~(pageblock_nr_pages-1);
  945                 zone->compact_cached_free_pfn = cc->free_pfn;
  946         }
  947         if (cc->migrate_pfn < start_pfn || cc->migrate_pfn > end_pfn) {
  948                 cc->migrate_pfn = start_pfn;
  949                 zone->compact_cached_migrate_pfn = cc->migrate_pfn;
  950         }
  951 
  952         /*
  953          * Clear pageblock skip if there were failures recently and compaction
  954          * is about to be retried after being deferred. kswapd does not do
  955          * this reset as it'll reset the cached information when going to sleep.
  956          */
  957         if (compaction_restarting(zone, cc->order) && !current_is_kswapd())
  958                 __reset_isolation_suitable(zone);
  959 
  960         migrate_prep_local();
  961 
  962         while ((ret = compact_finished(zone, cc)) == COMPACT_CONTINUE) {
  963                 unsigned long nr_migrate, nr_remaining;
  964                 int err;
  965 
  966                 switch (isolate_migratepages(zone, cc)) {
  967                 case ISOLATE_ABORT:
  968                         ret = COMPACT_PARTIAL;
  969                         putback_movable_pages(&cc->migratepages);
  970                         cc->nr_migratepages = 0;
  971                         goto out;
  972                 case ISOLATE_NONE:
  973                         continue;
  974                 case ISOLATE_SUCCESS:
  975                         ;
  976                 }
  977 
  978                 nr_migrate = cc->nr_migratepages;
  979                 err = migrate_pages(&cc->migratepages, compaction_alloc,
  980                                 (unsigned long)cc, false,
  981                                 cc->sync ? MIGRATE_SYNC_LIGHT : MIGRATE_ASYNC,
  982                                 MR_COMPACTION);
  983                 update_nr_listpages(cc);
  984                 nr_remaining = cc->nr_migratepages;
  985 
  986                 trace_mm_compaction_migratepages(nr_migrate - nr_remaining,
  987                                                 nr_remaining);
  988 
  989                 /* Release isolated pages not migrated */
  990                 if (err) {
  991                         putback_movable_pages(&cc->migratepages);
  992                         cc->nr_migratepages = 0;
  993                         if (err == -ENOMEM) {
  994                                 ret = COMPACT_PARTIAL;
  995                                 goto out;
  996                         }
  997                 }
  998         }
  999 
 1000 out:
 1001         /* Release free pages and check accounting */
 1002         cc->nr_freepages -= release_freepages(&cc->freepages);
 1003         VM_BUG_ON(cc->nr_freepages != 0);
 1004 
 1005         return ret;
 1006 }
 1007 
 1008 static unsigned long compact_zone_order(struct zone *zone,
 1009                                  int order, gfp_t gfp_mask,
 1010                                  bool sync, bool *contended)
 1011 {
 1012         unsigned long ret;
 1013         struct compact_control cc = {
 1014                 .nr_freepages = 0,
 1015                 .nr_migratepages = 0,
 1016                 .order = order,
 1017                 .migratetype = allocflags_to_migratetype(gfp_mask),
 1018                 .zone = zone,
 1019                 .sync = sync,
 1020         };
 1021         INIT_LIST_HEAD(&cc.freepages);
 1022         INIT_LIST_HEAD(&cc.migratepages);
 1023 
 1024         ret = compact_zone(zone, &cc);
 1025 
 1026         VM_BUG_ON(!list_empty(&cc.freepages));
 1027         VM_BUG_ON(!list_empty(&cc.migratepages));
 1028 
 1029         *contended = cc.contended;
 1030         return ret;
 1031 }
 1032 
 1033 int sysctl_extfrag_threshold = 500;
 1034 
 1035 /**
 1036  * try_to_compact_pages - Direct compact to satisfy a high-order allocation
 1037  * @zonelist: The zonelist used for the current allocation
 1038  * @order: The order of the current allocation
 1039  * @gfp_mask: The GFP mask of the current allocation
 1040  * @nodemask: The allowed nodes to allocate from
 1041  * @sync: Whether migration is synchronous or not
 1042  * @contended: Return value that is true if compaction was aborted due to lock contention
 1043  * @page: Optionally capture a free page of the requested order during compaction
 1044  *
 1045  * This is the main entry point for direct page compaction.
 1046  */
 1047 unsigned long try_to_compact_pages(struct zonelist *zonelist,
 1048                         int order, gfp_t gfp_mask, nodemask_t *nodemask,
 1049                         bool sync, bool *contended)
 1050 {
 1051         enum zone_type high_zoneidx = gfp_zone(gfp_mask);
 1052         int may_enter_fs = gfp_mask & __GFP_FS;
 1053         int may_perform_io = gfp_mask & __GFP_IO;
 1054         struct zoneref *z;
 1055         struct zone *zone;
 1056         int rc = COMPACT_SKIPPED;
 1057         int alloc_flags = 0;
 1058 
 1059         /* Check if the GFP flags allow compaction */
 1060         if (!order || !may_enter_fs || !may_perform_io)
 1061                 return rc;
 1062 
 1063         count_compact_event(COMPACTSTALL);
 1064 
 1065 #ifdef CONFIG_CMA
 1066         if (allocflags_to_migratetype(gfp_mask) == MIGRATE_MOVABLE)
 1067                 alloc_flags |= ALLOC_CMA;
 1068 #endif
 1069         /* Compact each zone in the list */
 1070         for_each_zone_zonelist_nodemask(zone, z, zonelist, high_zoneidx,
 1071                                                                 nodemask) {
 1072                 int status;
 1073 
 1074                 status = compact_zone_order(zone, order, gfp_mask, sync,
 1075                                                 contended);
 1076                 rc = max(status, rc);
 1077 
 1078                 /* If a normal allocation would succeed, stop compacting */
 1079                 if (zone_watermark_ok(zone, order, low_wmark_pages(zone), 0,
 1080                                       alloc_flags))
 1081                         break;
 1082         }
 1083 
 1084         return rc;
 1085 }
 1086 
 1087 
 1088 /* Compact all zones within a node */
 1089 static int __compact_pgdat(pg_data_t *pgdat, struct compact_control *cc)
 1090 {
 1091         int zoneid;
 1092         struct zone *zone;
 1093 
 1094         for (zoneid = 0; zoneid < MAX_NR_ZONES; zoneid++) {
 1095 
 1096                 zone = &pgdat->node_zones[zoneid];
 1097                 if (!populated_zone(zone))
 1098                         continue;
 1099 
 1100                 cc->nr_freepages = 0;
 1101                 cc->nr_migratepages = 0;
 1102                 cc->zone = zone;
 1103                 INIT_LIST_HEAD(&cc->freepages);
 1104                 INIT_LIST_HEAD(&cc->migratepages);
 1105 
 1106                 if (cc->order == -1 || !compaction_deferred(zone, cc->order))
 1107                         compact_zone(zone, cc);
 1108 
 1109                 if (cc->order > 0) {
 1110                         int ok = zone_watermark_ok(zone, cc->order,
 1111                                                 low_wmark_pages(zone), 0, 0);
 1112                         if (ok && cc->order >= zone->compact_order_failed)
 1113                                 zone->compact_order_failed = cc->order + 1;
 1114                         /* Currently async compaction is never deferred. */
 1115                         else if (!ok && cc->sync)
 1116                                 defer_compaction(zone, cc->order);
 1117                 }
 1118 
 1119                 VM_BUG_ON(!list_empty(&cc->freepages));
 1120                 VM_BUG_ON(!list_empty(&cc->migratepages));
 1121         }
 1122 
 1123         return 0;
 1124 }
 1125 
 1126 int compact_pgdat(pg_data_t *pgdat, int order)
 1127 {
 1128         struct compact_control cc = {
 1129                 .order = order,
 1130                 .sync = false,
 1131         };
 1132 
 1133         return __compact_pgdat(pgdat, &cc);
 1134 }
 1135 
 1136 static int compact_node(int nid)
 1137 {
 1138         struct compact_control cc = {
 1139                 .order = -1,
 1140                 .sync = true,
 1141         };
 1142 
 1143         return __compact_pgdat(NODE_DATA(nid), &cc);
 1144 }
 1145 
 1146 /* Compact all nodes in the system */
 1147 static void compact_nodes(void)
 1148 {
 1149         int nid;
 1150 
 1151         /* Flush pending updates to the LRU lists */
 1152         lru_add_drain_all();
 1153 
 1154         for_each_online_node(nid)
 1155                 compact_node(nid);
 1156 }
 1157 
 1158 /* The written value is actually unused, all memory is compacted */
 1159 int sysctl_compact_memory;
 1160 
 1161 /* This is the entry point for compacting all nodes via /proc/sys/vm */
 1162 int sysctl_compaction_handler(struct ctl_table *table, int write,
 1163                         void __user *buffer, size_t *length, loff_t *ppos)
 1164 {
 1165         if (write)
 1166                 compact_nodes();
 1167 
 1168         return 0;
 1169 }
 1170 
 1171 int sysctl_extfrag_handler(struct ctl_table *table, int write,
 1172                         void __user *buffer, size_t *length, loff_t *ppos)
 1173 {
 1174         proc_dointvec_minmax(table, write, buffer, length, ppos);
 1175 
 1176         return 0;
 1177 }
 1178 
 1179 #if defined(CONFIG_SYSFS) && defined(CONFIG_NUMA)
 1180 ssize_t sysfs_compact_node(struct device *dev,
 1181                         struct device_attribute *attr,
 1182                         const char *buf, size_t count)
 1183 {
 1184         int nid = dev->id;
 1185 
 1186         if (nid >= 0 && nid < nr_node_ids && node_online(nid)) {
 1187                 /* Flush pending updates to the LRU lists */
 1188                 lru_add_drain_all();
 1189 
 1190                 compact_node(nid);
 1191         }
 1192 
 1193         return count;
 1194 }
 1195 static DEVICE_ATTR(compact, S_IWUSR, NULL, sysfs_compact_node);
 1196 
 1197 int compaction_register_node(struct node *node)
 1198 {
 1199         return device_create_file(&node->dev, &dev_attr_compact);
 1200 }
 1201 
 1202 void compaction_unregister_node(struct node *node)
 1203 {
 1204         return device_remove_file(&node->dev, &dev_attr_compact);
 1205 }
 1206 #endif /* CONFIG_SYSFS && CONFIG_NUMA */
 1207 
 1208 #endif /* CONFIG_COMPACTION */

Cache object: 857f6f22f624c368ad753a978e979289


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.