vm_pageout.c

Version: - FREEBSD - FREEBSD-13-STABLE - FREEBSD-13-0 - FREEBSD-12-STABLE - FREEBSD-12-0 - FREEBSD-11-STABLE - FREEBSD-11-0 - FREEBSD-10-STABLE - FREEBSD-10-0 - FREEBSD-9-STABLE - FREEBSD-9-0 - FREEBSD-8-STABLE - FREEBSD-8-0 - FREEBSD-7-STABLE - FREEBSD-7-0 - FREEBSD-6-STABLE - FREEBSD-6-0 - FREEBSD-5-STABLE - FREEBSD-5-0 - FREEBSD-4-STABLE - FREEBSD-3-STABLE - FREEBSD22 - l41 - OPENBSD - linux-2.6 - MK84 - PLAN9 - xnu-8792
SearchContext: - none - 3 - 10
    1 /*
    2  * Copyright (c) 1991 Regents of the University of California.
    3  * All rights reserved.
    4  * Copyright (c) 1994 John S. Dyson
    5  * All rights reserved.
    6  * Copyright (c) 1994 David Greenman
    7  * All rights reserved.
    8  *
    9  * This code is derived from software contributed to Berkeley by
   10  * The Mach Operating System project at Carnegie-Mellon University.
   11  *
   12  * Redistribution and use in source and binary forms, with or without
   13  * modification, are permitted provided that the following conditions
   14  * are met:
   15  * 1. Redistributions of source code must retain the above copyright
   16  *    notice, this list of conditions and the following disclaimer.
   17  * 2. Redistributions in binary form must reproduce the above copyright
   18  *    notice, this list of conditions and the following disclaimer in the
   19  *    documentation and/or other materials provided with the distribution.
   20  * 3. All advertising materials mentioning features or use of this software
   21  *    must display the following acknowledgement:
   22  *      This product includes software developed by the University of
   23  *      California, Berkeley and its contributors.
   24  * 4. Neither the name of the University nor the names of its contributors
   25  *    may be used to endorse or promote products derived from this software
   26  *    without specific prior written permission.
   27  *
   28  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   29  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   30  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   31  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   32  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   33  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   34  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   35  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   36  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   37  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   38  * SUCH DAMAGE.
   39  *
   40  *      from: @(#)vm_pageout.c  7.4 (Berkeley) 5/7/91
   41  *
   42  *
   43  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
   44  * All rights reserved.
   45  *
   46  * Authors: Avadis Tevanian, Jr., Michael Wayne Young
   47  *
   48  * Permission to use, copy, modify and distribute this software and
   49  * its documentation is hereby granted, provided that both the copyright
   50  * notice and this permission notice appear in all copies of the
   51  * software, derivative works or modified versions, and any portions
   52  * thereof, and that both notices appear in supporting documentation.
   53  *
   54  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
   55  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
   56  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
   57  *
   58  * Carnegie Mellon requests users of this software to return to
   59  *
   60  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
   61  *  School of Computer Science
   62  *  Carnegie Mellon University
   63  *  Pittsburgh PA 15213-3890
   64  *
   65  * any improvements or extensions that they make and grant Carnegie the
   66  * rights to redistribute these changes.
   67  *
   68  * $FreeBSD: src/sys/vm/vm_pageout.c,v 1.86.2.3 1999/09/05 08:24:36 peter Exp $
   69  */
   70 
   71 /*
   72  *      The proverbial page-out daemon.
   73  */
   74 
   75 #include <sys/param.h>
   76 #include <sys/systm.h>
   77 #include <sys/kernel.h>
   78 #include <sys/proc.h>
   79 #include <sys/resourcevar.h>
   80 #include <sys/malloc.h>
   81 #include <sys/kernel.h>
   82 #include <sys/signalvar.h>
   83 #include <sys/vnode.h>
   84 #include <sys/vmmeter.h>
   85 #include <sys/sysctl.h>
   86 
   87 #include <vm/vm.h>
   88 #include <vm/vm_param.h>
   89 #include <vm/vm_prot.h>
   90 #include <vm/lock.h>
   91 #include <vm/vm_object.h>
   92 #include <vm/vm_page.h>
   93 #include <vm/vm_map.h>
   94 #include <vm/vm_pageout.h>
   95 #include <vm/vm_kern.h>
   96 #include <vm/vm_pager.h>
   97 #include <vm/swap_pager.h>
   98 #include <vm/vm_extern.h>
   99 
  100 /*
  101  * System initialization
  102  */
  103 
  104 /* the kernel process "vm_pageout"*/
  105 static void vm_pageout __P((void));
  106 static int vm_pageout_clean __P((vm_page_t, int));
  107 static int vm_pageout_scan __P((void));
  108 static int vm_pageout_free_page_calc __P((vm_size_t count));
  109 struct proc *pageproc;
  110 
  111 static struct kproc_desc page_kp = {
  112         "pagedaemon",
  113         vm_pageout,
  114         &pageproc
  115 };
  116 SYSINIT_KT(pagedaemon, SI_SUB_KTHREAD_PAGE, SI_ORDER_FIRST, kproc_start, &page_kp)
  117 
  118 #if !defined(NO_SWAPPING)
  119 /* the kernel process "vm_daemon"*/
  120 static void vm_daemon __P((void));
  121 static struct   proc *vmproc;
  122 
  123 static struct kproc_desc vm_kp = {
  124         "vmdaemon",
  125         vm_daemon,
  126         &vmproc
  127 };
  128 SYSINIT_KT(vmdaemon, SI_SUB_KTHREAD_VM, SI_ORDER_FIRST, kproc_start, &vm_kp)
  129 #endif
  130 
  131 
  132 int vm_pages_needed;            /* Event on which pageout daemon sleeps */
  133 
  134 int vm_pageout_pages_needed;    /* flag saying that the pageout daemon needs pages */
  135 
  136 extern int npendingio;
  137 #if !defined(NO_SWAPPING)
  138 static int vm_pageout_req_swapout;      /* XXX */
  139 static int vm_daemon_needed;
  140 #endif
  141 extern int nswiodone;
  142 extern int vm_swap_size;
  143 extern int vfs_update_wakeup;
  144 int vm_pageout_algorithm_lru=0;
  145 #if defined(NO_SWAPPING)
  146 int vm_swapping_enabled=0;
  147 #else
  148 int vm_swapping_enabled=1;
  149 #endif
  150 
  151 SYSCTL_INT(_vm, VM_PAGEOUT_ALGORITHM, pageout_algorithm,
  152         CTLFLAG_RW, &vm_pageout_algorithm_lru, 0, "");
  153 
  154 #if defined(NO_SWAPPING)
  155 SYSCTL_INT(_vm, VM_SWAPPING_ENABLED, swapping_enabled,
  156         CTLFLAG_RD, &vm_swapping_enabled, 0, "");
  157 #else
  158 SYSCTL_INT(_vm, VM_SWAPPING_ENABLED, swapping_enabled,
  159         CTLFLAG_RW, &vm_swapping_enabled, 0, "");
  160 #endif
  161 
  162 #define MAXLAUNDER (cnt.v_page_count > 1800 ? 32 : 16)
  163 
  164 #define VM_PAGEOUT_PAGE_COUNT 16
  165 int vm_pageout_page_count = VM_PAGEOUT_PAGE_COUNT;
  166 
  167 int vm_page_max_wired;          /* XXX max # of wired pages system-wide */
  168 
  169 #if !defined(NO_SWAPPING)
  170 typedef void freeer_fcn_t __P((vm_map_t, vm_object_t, vm_pindex_t, int));
  171 static void vm_pageout_map_deactivate_pages __P((vm_map_t, vm_pindex_t));
  172 static freeer_fcn_t vm_pageout_object_deactivate_pages;
  173 static void vm_req_vmdaemon __P((void));
  174 #endif
  175 
  176 /*
  177  * vm_pageout_clean:
  178  *
  179  * Clean the page and remove it from the laundry.
  180  * 
  181  * We set the busy bit to cause potential page faults on this page to
  182  * block.
  183  * 
  184  * And we set pageout-in-progress to keep the object from disappearing
  185  * during pageout.  This guarantees that the page won't move from the
  186  * inactive queue.  (However, any other page on the inactive queue may
  187  * move!)
  188  */
  189 static int
  190 vm_pageout_clean(m, sync)
  191         vm_page_t m;
  192         int sync;
  193 {
  194         register vm_object_t object;
  195         vm_page_t mc[2*vm_pageout_page_count];
  196         int pageout_count;
  197         int i, forward_okay, backward_okay, page_base;
  198         vm_pindex_t pindex = m->pindex;
  199 
  200         object = m->object;
  201 
  202         /*
  203          * If not OBJT_SWAP, additional memory may be needed to do the pageout.
  204          * Try to avoid the deadlock.
  205          */
  206         if ((sync != VM_PAGEOUT_FORCE) &&
  207             (object->type == OBJT_DEFAULT) &&
  208             ((cnt.v_free_count + cnt.v_cache_count) < cnt.v_pageout_free_min))
  209                 return 0;
  210 
  211         /*
  212          * Don't mess with the page if it's busy.
  213          */
  214         if ((!sync && m->hold_count != 0) ||
  215             ((m->busy != 0) || (m->flags & PG_BUSY)))
  216                 return 0;
  217 
  218         /*
  219          * Try collapsing before it's too late.
  220          */
  221         if (!sync && object->backing_object) {
  222                 vm_object_collapse(object);
  223         }
  224 
  225         mc[vm_pageout_page_count] = m;
  226         pageout_count = 1;
  227         page_base = vm_pageout_page_count;
  228         forward_okay = TRUE;
  229         if (pindex != 0)
  230                 backward_okay = TRUE;
  231         else
  232                 backward_okay = FALSE;
  233         /*
  234          * Scan object for clusterable pages.
  235          *
  236          * We can cluster ONLY if: ->> the page is NOT
  237          * clean, wired, busy, held, or mapped into a
  238          * buffer, and one of the following:
  239          * 1) The page is inactive, or a seldom used
  240          *    active page.
  241          * -or-
  242          * 2) we force the issue.
  243          */
  244         for (i = 1; (i < vm_pageout_page_count) && (forward_okay || backward_okay); i++) {
  245                 vm_page_t p;
  246 
  247                 /*
  248                  * See if forward page is clusterable.
  249                  */
  250                 if (forward_okay) {
  251                         /*
  252                          * Stop forward scan at end of object.
  253                          */
  254                         if ((pindex + i) > object->size) {
  255                                 forward_okay = FALSE;
  256                                 goto do_backward;
  257                         }
  258                         p = vm_page_lookup(object, pindex + i);
  259                         if (p) {
  260                                 if (((p->queue - p->pc) == PQ_CACHE) ||
  261                                         (p->flags & PG_BUSY) || p->busy) {
  262                                         forward_okay = FALSE;
  263                                         goto do_backward;
  264                                 }
  265                                 vm_page_test_dirty(p);
  266                                 if ((p->dirty & p->valid) != 0 &&
  267                                     ((p->queue == PQ_INACTIVE) ||
  268                                      (sync == VM_PAGEOUT_FORCE)) &&
  269                                     (p->wire_count == 0) &&
  270                                     (p->hold_count == 0)) {
  271                                         mc[vm_pageout_page_count + i] = p;
  272                                         pageout_count++;
  273                                         if (pageout_count == vm_pageout_page_count)
  274                                                 break;
  275                                 } else {
  276                                         forward_okay = FALSE;
  277                                 }
  278                         } else {
  279                                 forward_okay = FALSE;
  280                         }
  281                 }
  282 do_backward:
  283                 /*
  284                  * See if backward page is clusterable.
  285                  */
  286                 if (backward_okay) {
  287                         /*
  288                          * Stop backward scan at beginning of object.
  289                          */
  290                         if ((pindex - i) == 0) {
  291                                 backward_okay = FALSE;
  292                         }
  293                         p = vm_page_lookup(object, pindex - i);
  294                         if (p) {
  295                                 if (((p->queue - p->pc) == PQ_CACHE) ||
  296                                         (p->flags & PG_BUSY) || p->busy) {
  297                                         backward_okay = FALSE;
  298                                         continue;
  299                                 }
  300                                 vm_page_test_dirty(p);
  301                                 if ((p->dirty & p->valid) != 0 &&
  302                                     ((p->queue == PQ_INACTIVE) ||
  303                                      (sync == VM_PAGEOUT_FORCE)) &&
  304                                     (p->wire_count == 0) &&
  305                                     (p->hold_count == 0)) {
  306                                         mc[vm_pageout_page_count - i] = p;
  307                                         pageout_count++;
  308                                         page_base--;
  309                                         if (pageout_count == vm_pageout_page_count)
  310                                                 break;
  311                                 } else {
  312                                         backward_okay = FALSE;
  313                                 }
  314                         } else {
  315                                 backward_okay = FALSE;
  316                         }
  317                 }
  318         }
  319 
  320         /*
  321          * we allow reads during pageouts...
  322          */
  323         for (i = page_base; i < (page_base + pageout_count); i++) {
  324                 mc[i]->flags |= PG_BUSY;
  325                 vm_page_protect(mc[i], VM_PROT_READ);
  326         }
  327 
  328         return vm_pageout_flush(&mc[page_base], pageout_count, sync);
  329 }
  330 
  331 int
  332 vm_pageout_flush(mc, count, sync)
  333         vm_page_t *mc;
  334         int count;
  335         int sync;
  336 {
  337         register vm_object_t object;
  338         int pageout_status[count];
  339         int anyok = 0;
  340         int i;
  341 
  342         object = mc[0]->object;
  343         object->paging_in_progress += count;
  344 
  345         vm_pager_put_pages(object, mc, count,
  346             ((sync || (object == kernel_object)) ? TRUE : FALSE),
  347             pageout_status);
  348 
  349         for (i = 0; i < count; i++) {
  350                 vm_page_t mt = mc[i];
  351 
  352                 switch (pageout_status[i]) {
  353                 case VM_PAGER_OK:
  354                         ++anyok;
  355                         break;
  356                 case VM_PAGER_PEND:
  357                         ++anyok;
  358                         break;
  359                 case VM_PAGER_BAD:
  360                         /*
  361                          * Page outside of range of object. Right now we
  362                          * essentially lose the changes by pretending it
  363                          * worked.
  364                          */
  365                         pmap_clear_modify(VM_PAGE_TO_PHYS(mt));
  366                         mt->dirty = 0;
  367                         break;
  368                 case VM_PAGER_ERROR:
  369                 case VM_PAGER_FAIL:
  370                         /*
  371                          * If page couldn't be paged out, then reactivate the
  372                          * page so it doesn't clog the inactive list.  (We
  373                          * will try paging out it again later).
  374                          */
  375                         if (mt->queue == PQ_INACTIVE)
  376                                 vm_page_activate(mt);
  377                         break;
  378                 case VM_PAGER_AGAIN:
  379                         break;
  380                 }
  381 
  382 
  383                 /*
  384                  * If the operation is still going, leave the page busy to
  385                  * block all other accesses. Also, leave the paging in
  386                  * progress indicator set so that we don't attempt an object
  387                  * collapse.
  388                  */
  389                 if (pageout_status[i] != VM_PAGER_PEND) {
  390                         vm_object_pip_wakeup(object);
  391                         PAGE_WAKEUP(mt);
  392                 }
  393         }
  394         return anyok;
  395 }
  396 
  397 #if !defined(NO_SWAPPING)
  398 /*
  399  *      vm_pageout_object_deactivate_pages
  400  *
  401  *      deactivate enough pages to satisfy the inactive target
  402  *      requirements or if vm_page_proc_limit is set, then
  403  *      deactivate all of the pages in the object and its
  404  *      backing_objects.
  405  *
  406  *      The object and map must be locked.
  407  */
  408 static void
  409 vm_pageout_object_deactivate_pages(map, object, desired, map_remove_only)
  410         vm_map_t map;
  411         vm_object_t object;
  412         vm_pindex_t desired;
  413         int map_remove_only;
  414 {
  415         register vm_page_t p, next;
  416         int rcount;
  417         int remove_mode;
  418         int s;
  419 
  420         if (object->type == OBJT_DEVICE)
  421                 return;
  422 
  423         while (object) {
  424                 if (vm_map_pmap(map)->pm_stats.resident_count <= desired)
  425                         return;
  426                 if (object->paging_in_progress)
  427                         return;
  428 
  429                 remove_mode = map_remove_only;
  430                 if (object->shadow_count > 1)
  431                         remove_mode = 1;
  432         /*
  433          * scan the objects entire memory queue
  434          */
  435                 rcount = object->resident_page_count;
  436                 p = TAILQ_FIRST(&object->memq);
  437                 while (p && (rcount-- > 0)) {
  438                         int refcount;
  439                         if (vm_map_pmap(map)->pm_stats.resident_count <= desired)
  440                                 return;
  441                         next = TAILQ_NEXT(p, listq);
  442                         cnt.v_pdpages++;
  443                         if (p->wire_count != 0 ||
  444                             p->hold_count != 0 ||
  445                             p->busy != 0 ||
  446                             (p->flags & PG_BUSY) ||
  447                             !pmap_page_exists(vm_map_pmap(map), VM_PAGE_TO_PHYS(p))) {
  448                                 p = next;
  449                                 continue;
  450                         }
  451 
  452                         refcount = pmap_ts_referenced(VM_PAGE_TO_PHYS(p));
  453                         if (refcount) {
  454                                 p->flags |= PG_REFERENCED;
  455                         } else if (p->flags & PG_REFERENCED) {
  456                                 refcount = 1;
  457                         }
  458 
  459                         if ((p->queue != PQ_ACTIVE) &&
  460                                 (p->flags & PG_REFERENCED)) {
  461                                 vm_page_activate(p);
  462                                 p->act_count += refcount;
  463                                 p->flags &= ~PG_REFERENCED;
  464                         } else if (p->queue == PQ_ACTIVE) {
  465                                 if ((p->flags & PG_REFERENCED) == 0) {
  466                                         p->act_count -= min(p->act_count, ACT_DECLINE);
  467                                         if (!remove_mode && (vm_pageout_algorithm_lru || (p->act_count == 0))) {
  468                                                 vm_page_protect(p, VM_PROT_NONE);
  469                                                 vm_page_deactivate(p);
  470                                         } else {
  471                                                 s = splvm();
  472                                                 TAILQ_REMOVE(&vm_page_queue_active, p, pageq);
  473                                                 TAILQ_INSERT_TAIL(&vm_page_queue_active, p, pageq);
  474                                                 splx(s);
  475                                         }
  476                                 } else {
  477                                         p->flags &= ~PG_REFERENCED;
  478                                         if (p->act_count < (ACT_MAX - ACT_ADVANCE))
  479                                                 p->act_count += ACT_ADVANCE;
  480                                         s = splvm();
  481                                         TAILQ_REMOVE(&vm_page_queue_active, p, pageq);
  482                                         TAILQ_INSERT_TAIL(&vm_page_queue_active, p, pageq);
  483                                         splx(s);
  484                                 }
  485                         } else if (p->queue == PQ_INACTIVE) {
  486                                 vm_page_protect(p, VM_PROT_NONE);
  487                         }
  488                         p = next;
  489                 }
  490                 object = object->backing_object;
  491         }
  492         return;
  493 }
  494 
  495 /*
  496  * deactivate some number of pages in a map, try to do it fairly, but
  497  * that is really hard to do.
  498  */
  499 static void
  500 vm_pageout_map_deactivate_pages(map, desired)
  501         vm_map_t map;
  502         vm_pindex_t desired;
  503 {
  504         vm_map_entry_t tmpe;
  505         vm_object_t obj, bigobj;
  506 
  507         vm_map_reference(map);
  508         if (!lock_try_write(&map->lock)) {
  509                 vm_map_deallocate(map);
  510                 return;
  511         }
  512 
  513         bigobj = NULL;
  514 
  515         /*
  516          * first, search out the biggest object, and try to free pages from
  517          * that.
  518          */
  519         tmpe = map->header.next;
  520         while (tmpe != &map->header) {
  521                 if ((tmpe->eflags & (MAP_ENTRY_IS_A_MAP|MAP_ENTRY_IS_SUB_MAP)) == 0) {
  522                         obj = tmpe->object.vm_object;
  523                         if ((obj != NULL) && (obj->shadow_count <= 1) &&
  524                                 ((bigobj == NULL) ||
  525                                  (bigobj->resident_page_count < obj->resident_page_count))) {
  526                                 bigobj = obj;
  527                         }
  528                 }
  529                 tmpe = tmpe->next;
  530         }
  531 
  532         if (bigobj)
  533                 vm_pageout_object_deactivate_pages(map, bigobj, desired, 0);
  534 
  535         /*
  536          * Next, hunt around for other pages to deactivate.  We actually
  537          * do this search sort of wrong -- .text first is not the best idea.
  538          */
  539         tmpe = map->header.next;
  540         while (tmpe != &map->header) {
  541                 if (vm_map_pmap(map)->pm_stats.resident_count <= desired)
  542                         break;
  543                 if ((tmpe->eflags & (MAP_ENTRY_IS_A_MAP|MAP_ENTRY_IS_SUB_MAP)) == 0) {
  544                         obj = tmpe->object.vm_object;
  545                         if (obj)
  546                                 vm_pageout_object_deactivate_pages(map, obj, desired, 0);
  547                 }
  548                 tmpe = tmpe->next;
  549         };
  550 
  551         /*
  552          * Remove all mappings if a process is swapped out, this will free page
  553          * table pages.
  554          */
  555         if (desired == 0)
  556                 pmap_remove(vm_map_pmap(map),
  557                         VM_MIN_ADDRESS, VM_MAXUSER_ADDRESS);
  558         vm_map_unlock(map);
  559         vm_map_deallocate(map);
  560         return;
  561 }
  562 #endif
  563 
  564 /*
  565  *      vm_pageout_scan does the dirty work for the pageout daemon.
  566  */
  567 static int
  568 vm_pageout_scan()
  569 {
  570         vm_page_t m, next;
  571         int page_shortage, addl_page_shortage, maxscan, maxlaunder, pcount;
  572         int pages_freed;
  573         struct proc *p, *bigproc;
  574         vm_offset_t size, bigsize;
  575         vm_object_t object;
  576         int force_wakeup = 0;
  577         int vnodes_skipped = 0;
  578         int s;
  579 
  580         /*
  581          * Start scanning the inactive queue for pages we can free. We keep
  582          * scanning until we have enough free pages or we have scanned through
  583          * the entire queue.  If we encounter dirty pages, we start cleaning
  584          * them.
  585          */
  586 
  587         pages_freed = 0;
  588         addl_page_shortage = 0;
  589 
  590         maxlaunder = (cnt.v_inactive_target > MAXLAUNDER) ?
  591             MAXLAUNDER : cnt.v_inactive_target;
  592 rescan0:
  593         maxscan = cnt.v_inactive_count;
  594         for( m = TAILQ_FIRST(&vm_page_queue_inactive);
  595 
  596                 (m != NULL) && (maxscan-- > 0) &&
  597                         ((cnt.v_cache_count + cnt.v_free_count) <
  598                         (cnt.v_cache_min + cnt.v_free_target));
  599 
  600                 m = next) {
  601 
  602                 cnt.v_pdpages++;
  603 
  604                 if (m->queue != PQ_INACTIVE) {
  605                         goto rescan0;
  606                 }
  607 
  608                 next = TAILQ_NEXT(m, pageq);
  609 
  610                 if (m->hold_count) {
  611                         s = splvm();
  612                         TAILQ_REMOVE(&vm_page_queue_inactive, m, pageq);
  613                         TAILQ_INSERT_TAIL(&vm_page_queue_inactive, m, pageq);
  614                         splx(s);
  615                         addl_page_shortage++;
  616                         continue;
  617                 }
  618                 /*
  619                  * Dont mess with busy pages, keep in the front of the
  620                  * queue, most likely are being paged out.
  621                  */
  622                 if (m->busy || (m->flags & PG_BUSY)) {
  623                         addl_page_shortage++;
  624                         continue;
  625                 }
  626 
  627                 if (m->object->ref_count == 0) {
  628                         m->flags &= ~PG_REFERENCED;
  629                         pmap_clear_reference(VM_PAGE_TO_PHYS(m));
  630                 } else if (((m->flags & PG_REFERENCED) == 0) &&
  631                         pmap_ts_referenced(VM_PAGE_TO_PHYS(m))) {
  632                         vm_page_activate(m);
  633                         continue;
  634                 }
  635 
  636                 if ((m->flags & PG_REFERENCED) != 0) {
  637                         m->flags &= ~PG_REFERENCED;
  638                         pmap_clear_reference(VM_PAGE_TO_PHYS(m));
  639                         vm_page_activate(m);
  640                         continue;
  641                 }
  642 
  643                 if (m->dirty == 0) {
  644                         vm_page_test_dirty(m);
  645                 } else if (m->dirty != 0) {
  646                         m->dirty = VM_PAGE_BITS_ALL;
  647                 }
  648 
  649                 if (m->valid == 0) {
  650                         vm_page_protect(m, VM_PROT_NONE);
  651                         vm_page_free(m);
  652                         cnt.v_dfree++;
  653                         ++pages_freed;
  654                 } else if (m->dirty == 0) {
  655                         vm_page_cache(m);
  656                         ++pages_freed;
  657                 } else if (maxlaunder > 0) {
  658                         int written;
  659                         struct vnode *vp = NULL;
  660 
  661                         object = m->object;
  662                         if (object->flags & OBJ_DEAD) {
  663                                 s = splvm();
  664                                 TAILQ_REMOVE(&vm_page_queue_inactive, m, pageq);
  665                                 TAILQ_INSERT_TAIL(&vm_page_queue_inactive, m, pageq);
  666                                 splx(s);
  667                                 continue;
  668                         }
  669 
  670                         if (object->type == OBJT_VNODE) {
  671                                 vp = object->handle;
  672                                 if (VOP_ISLOCKED(vp) || vget(vp, 1)) {
  673                                         if ((m->queue == PQ_INACTIVE) &&
  674                                                 (m->hold_count == 0) &&
  675                                                 (m->busy == 0) &&
  676                                                 (m->flags & PG_BUSY) == 0) {
  677                                                 s = splvm();
  678                                                 TAILQ_REMOVE(&vm_page_queue_inactive, m, pageq);
  679                                                 TAILQ_INSERT_TAIL(&vm_page_queue_inactive, m, pageq);
  680                                                 splx(s);
  681                                         }
  682                                         if (object->flags & OBJ_MIGHTBEDIRTY)
  683                                                 ++vnodes_skipped;
  684                                         continue;
  685                                 }
  686 
  687                                 /*
  688                                  * The page might have been moved to another queue
  689                                  * during potential blocking in vget() above.
  690                                  */
  691                                 if (m->queue != PQ_INACTIVE) {
  692                                         if (object->flags & OBJ_MIGHTBEDIRTY)
  693                                                 ++vnodes_skipped;
  694                                         vput(vp);
  695                                         continue;
  696                                 }
  697         
  698                                 /*
  699                                  * The page may have been busied during the blocking in
  700                                  * vput();  We don't move the page back onto the end of
  701                                  * the queue so that statistics are more correct if we don't.
  702                                  */
  703                                 if (m->busy || (m->flags & PG_BUSY)) {
  704                                         vput(vp);
  705                                         continue;
  706                                 }
  707 
  708                                 /*
  709                                  * If the page has become held, then skip it
  710                                  */
  711                                 if (m->hold_count) {
  712                                         s = splvm();
  713                                         TAILQ_REMOVE(&vm_page_queue_inactive, m, pageq);
  714                                         TAILQ_INSERT_TAIL(&vm_page_queue_inactive, m, pageq);
  715                                         splx(s);
  716                                         if (object->flags & OBJ_MIGHTBEDIRTY)
  717                                                 ++vnodes_skipped;
  718                                         vput(vp);
  719                                         continue;
  720                                 }
  721                         }
  722 
  723                         /*
  724                          * If a page is dirty, then it is either being washed
  725                          * (but not yet cleaned) or it is still in the
  726                          * laundry.  If it is still in the laundry, then we
  727                          * start the cleaning operation.
  728                          */
  729                         written = vm_pageout_clean(m, 0);
  730 
  731                         if (vp)
  732                                 vput(vp);
  733 
  734                         maxlaunder -= written;
  735                 }
  736         }
  737 
  738         /*
  739          * Compute the page shortage.  If we are still very low on memory be
  740          * sure that we will move a minimal amount of pages from active to
  741          * inactive.
  742          */
  743 
  744         page_shortage = (cnt.v_inactive_target + cnt.v_cache_min) -
  745             (cnt.v_free_count + cnt.v_inactive_count + cnt.v_cache_count);
  746         if (page_shortage <= 0) {
  747                 if (pages_freed == 0) {
  748                         page_shortage = cnt.v_free_min - cnt.v_free_count;
  749                 } else {
  750                         page_shortage = 1;
  751                 }
  752         }
  753         if (addl_page_shortage) {
  754                 if (page_shortage < 0)
  755                         page_shortage = 0;
  756                 page_shortage += addl_page_shortage;
  757         }
  758 
  759         pcount = cnt.v_active_count;
  760         m = TAILQ_FIRST(&vm_page_queue_active);
  761         while ((m != NULL) && (pcount-- > 0) && (page_shortage > 0)) {
  762                 int refcount;
  763 
  764                 if (m->queue != PQ_ACTIVE) {
  765                         break;
  766                 }
  767 
  768                 next = TAILQ_NEXT(m, pageq);
  769                 /*
  770                  * Don't deactivate pages that are busy.
  771                  */
  772                 if ((m->busy != 0) ||
  773                     (m->flags & PG_BUSY) ||
  774                     (m->hold_count != 0)) {
  775                         s = splvm();
  776                         TAILQ_REMOVE(&vm_page_queue_active, m, pageq);
  777                         TAILQ_INSERT_TAIL(&vm_page_queue_active, m, pageq);
  778                         splx(s);
  779                         m = next;
  780                         continue;
  781                 }
  782 
  783                 /*
  784                  * The count for pagedaemon pages is done after checking the
  785                  * page for eligbility...
  786                  */
  787                 cnt.v_pdpages++;
  788 
  789                 refcount = 0;
  790                 if (m->object->ref_count != 0) {
  791                         if (m->flags & PG_REFERENCED) {
  792                                 refcount += 1;
  793                         }
  794                         refcount += pmap_ts_referenced(VM_PAGE_TO_PHYS(m));
  795                         if (refcount) {
  796                                 m->act_count += ACT_ADVANCE + refcount;
  797                                 if (m->act_count > ACT_MAX)
  798                                         m->act_count = ACT_MAX;
  799                         }
  800                 }
  801 
  802                 m->flags &= ~PG_REFERENCED;
  803 
  804                 if (refcount && (m->object->ref_count != 0)) {
  805                         s = splvm();
  806                         TAILQ_REMOVE(&vm_page_queue_active, m, pageq);
  807                         TAILQ_INSERT_TAIL(&vm_page_queue_active, m, pageq);
  808                         splx(s);
  809                 } else {
  810                         m->act_count -= min(m->act_count, ACT_DECLINE);
  811                         if (vm_pageout_algorithm_lru ||
  812                                 (m->object->ref_count == 0) || (m->act_count == 0)) {
  813                                 --page_shortage;
  814                                 if (m->object->ref_count == 0) {
  815                                         vm_page_protect(m, VM_PROT_NONE);
  816                                         if (m->dirty == 0)
  817                                                 vm_page_cache(m);
  818                                         else
  819                                                 vm_page_deactivate(m);
  820                                 } else {
  821                                         vm_page_deactivate(m);
  822                                 }
  823                         } else {
  824                                 s = splvm();
  825                                 TAILQ_REMOVE(&vm_page_queue_active, m, pageq);
  826                                 TAILQ_INSERT_TAIL(&vm_page_queue_active, m, pageq);
  827                                 splx(s);
  828                         }
  829                 }
  830                 m = next;
  831         }
  832 
  833         s = splvm();
  834         /*
  835          * We try to maintain some *really* free pages, this allows interrupt
  836          * code to be guaranteed space.
  837          */
  838         while (cnt.v_free_count < cnt.v_free_reserved) {
  839                 static int cache_rover = 0;
  840                 m = vm_page_list_find(PQ_CACHE, cache_rover);
  841                 if (!m)
  842                         break;
  843                 cache_rover = (cache_rover + PQ_PRIME2) & PQ_L2_MASK;
  844                 vm_page_free(m);
  845                 cnt.v_dfree++;
  846         }
  847         splx(s);
  848 
  849         /*
  850          * If we didn't get enough free pages, and we have skipped a vnode
  851          * in a writeable object, wakeup the sync daemon.  And kick swapout
  852          * if we did not get enough free pages.
  853          */
  854         if ((cnt.v_cache_count + cnt.v_free_count) <
  855                 (cnt.v_free_target + cnt.v_cache_min) ) {
  856                 if (vnodes_skipped &&
  857                     (cnt.v_cache_count + cnt.v_free_count) < cnt.v_free_min) {
  858                         if (!vfs_update_wakeup) {
  859                                 vfs_update_wakeup = 1;
  860                                 wakeup(&vfs_update_wakeup);
  861                         }
  862                 }
  863 #if !defined(NO_SWAPPING)
  864                 if (vm_swapping_enabled &&
  865                         (cnt.v_free_count + cnt.v_cache_count < cnt.v_free_target)) {
  866                         vm_req_vmdaemon();
  867                         vm_pageout_req_swapout = 1;
  868                 }
  869 #endif
  870         }
  871 
  872 
  873         /*
  874          * make sure that we have swap space -- if we are low on memory and
  875          * swap -- then kill the biggest process.
  876          */
  877         if ((vm_swap_size == 0 || swap_pager_full) &&
  878             ((cnt.v_free_count + cnt.v_cache_count) < cnt.v_free_min)) {
  879                 bigproc = NULL;
  880                 bigsize = 0;
  881                 for (p = allproc.lh_first; p != 0; p = p->p_list.le_next) {
  882                         /*
  883                          * if this is a system process, skip it
  884                          */
  885                         if ((p->p_flag & P_SYSTEM) || (p->p_pid == 1) ||
  886                             ((p->p_pid < 48) && (vm_swap_size != 0))) {
  887                                 continue;
  888                         }
  889                         /*
  890                          * if the process is in a non-running type state,
  891                          * don't touch it.
  892                          */
  893                         if (p->p_stat != SRUN && p->p_stat != SSLEEP) {
  894                                 continue;
  895                         }
  896                         /*
  897                          * get the process size
  898                          */
  899                         size = p->p_vmspace->vm_pmap.pm_stats.resident_count;
  900                         /*
  901                          * if the this process is bigger than the biggest one
  902                          * remember it.
  903                          */
  904                         if (size > bigsize) {
  905                                 bigproc = p;
  906                                 bigsize = size;
  907                         }
  908                 }
  909                 if (bigproc != NULL) {
  910                         killproc(bigproc, "out of swap space");
  911                         bigproc->p_estcpu = 0;
  912                         bigproc->p_nice = PRIO_MIN;
  913                         resetpriority(bigproc);
  914                         wakeup(&cnt.v_free_count);
  915                 }
  916         }
  917         return force_wakeup;
  918 }
  919 
  920 static int
  921 vm_pageout_free_page_calc(count)
  922 vm_size_t count;
  923 {
  924         if (count < cnt.v_page_count)
  925                  return 0;
  926         /*
  927          * free_reserved needs to include enough for the largest swap pager
  928          * structures plus enough for any pv_entry structs when paging.
  929          */
  930         if (cnt.v_page_count > 1024)
  931                 cnt.v_free_min = 4 + (cnt.v_page_count - 1024) / 200;
  932         else
  933                 cnt.v_free_min = 4;
  934         cnt.v_pageout_free_min = (2*MAXBSIZE)/PAGE_SIZE +
  935                 cnt.v_interrupt_free_min;
  936         cnt.v_free_reserved = vm_pageout_page_count +
  937                 cnt.v_pageout_free_min + (count / 768) + PQ_L2_SIZE;
  938         cnt.v_free_min += cnt.v_free_reserved;
  939         return 1;
  940 }
  941 
  942 
  943 #ifdef unused
  944 int
  945 vm_pageout_free_pages(object, add)
  946 vm_object_t object;
  947 int add;
  948 {
  949         return vm_pageout_free_page_calc(object->size);
  950 }
  951 #endif
  952 
  953 /*
  954  *      vm_pageout is the high level pageout daemon.
  955  */
  956 static void
  957 vm_pageout()
  958 {
  959         /*
  960          * Initialize some paging parameters.
  961          */
  962 
  963         cnt.v_interrupt_free_min = 2;
  964         if (cnt.v_page_count < 2000)
  965                 vm_pageout_page_count = 8;
  966 
  967         vm_pageout_free_page_calc(cnt.v_page_count);
  968         /*
  969          * free_reserved needs to include enough for the largest swap pager
  970          * structures plus enough for any pv_entry structs when paging.
  971          */
  972         cnt.v_free_target = 3 * cnt.v_free_min + cnt.v_free_reserved;
  973 
  974         if (cnt.v_free_count > 1024) {
  975                 cnt.v_cache_max = (cnt.v_free_count - 1024) / 2;
  976                 cnt.v_cache_min = (cnt.v_free_count - 1024) / 8;
  977                 cnt.v_inactive_target = 2*cnt.v_cache_min + 192;
  978         } else {
  979                 cnt.v_cache_min = 0;
  980                 cnt.v_cache_max = 0;
  981                 cnt.v_inactive_target = cnt.v_free_count / 4;
  982         }
  983 
  984         /* XXX does not really belong here */
  985         if (vm_page_max_wired == 0)
  986                 vm_page_max_wired = cnt.v_free_count / 3;
  987 
  988 
  989         swap_pager_swap_init();
  990         /*
  991          * The pageout daemon is never done, so loop forever.
  992          */
  993         while (TRUE) {
  994                 int inactive_target;
  995                 int s = splvm();
  996                 if (!vm_pages_needed ||
  997                         ((cnt.v_free_count + cnt.v_cache_count) > cnt.v_free_min)) {
  998                         vm_pages_needed = 0;
  999                         tsleep(&vm_pages_needed, PVM, "psleep", 0);
 1000                 } else if (!vm_pages_needed) {
 1001                         tsleep(&vm_pages_needed, PVM, "psleep", hz/10);
 1002                 }
 1003                 inactive_target =
 1004                         (cnt.v_page_count - cnt.v_wire_count) / 4;
 1005                 if (inactive_target < 2*cnt.v_free_min)
 1006                         inactive_target = 2*cnt.v_free_min;
 1007                 cnt.v_inactive_target = inactive_target;
 1008                 if (vm_pages_needed)
 1009                         cnt.v_pdwakeups++;
 1010                 vm_pages_needed = 0;
 1011                 splx(s);
 1012                 vm_pager_sync();
 1013                 vm_pageout_scan();
 1014                 vm_pager_sync();
 1015                 wakeup(&cnt.v_free_count);
 1016         }
 1017 }
 1018 
 1019 void
 1020 pagedaemon_wakeup()
 1021 {
 1022         if (!vm_pages_needed && curproc != pageproc) {
 1023                 vm_pages_needed++;
 1024                 wakeup(&vm_pages_needed);
 1025         }
 1026 }
 1027 
 1028 #if !defined(NO_SWAPPING)
 1029 static void
 1030 vm_req_vmdaemon()
 1031 {
 1032         static int lastrun = 0;
 1033 
 1034         if ((ticks > (lastrun + hz)) || (ticks < lastrun)) {
 1035                 wakeup(&vm_daemon_needed);
 1036                 lastrun = ticks;
 1037         }
 1038 }
 1039 
 1040 static void
 1041 vm_daemon()
 1042 {
 1043         vm_object_t object;
 1044         struct proc *p;
 1045 
 1046         while (TRUE) {
 1047                 tsleep(&vm_daemon_needed, PUSER, "psleep", 0);
 1048                 if (vm_pageout_req_swapout) {
 1049                         swapout_procs();
 1050                         vm_pageout_req_swapout = 0;
 1051                 }
 1052                 /*
 1053                  * scan the processes for exceeding their rlimits or if
 1054                  * process is swapped out -- deactivate pages
 1055                  */
 1056 
 1057                 for (p = allproc.lh_first; p != 0; p = p->p_list.le_next) {
 1058                         quad_t limit;
 1059                         vm_offset_t size;
 1060 
 1061                         /*
 1062                          * if this is a system process or if we have already
 1063                          * looked at this process, skip it.
 1064                          */
 1065                         if (p->p_flag & (P_SYSTEM | P_WEXIT)) {
 1066                                 continue;
 1067                         }
 1068                         /*
 1069                          * if the process is in a non-running type state,
 1070                          * don't touch it.
 1071                          */
 1072                         if (p->p_stat != SRUN && p->p_stat != SSLEEP) {
 1073                                 continue;
 1074                         }
 1075                         /*
 1076                          * get a limit
 1077                          */
 1078                         limit = qmin(p->p_rlimit[RLIMIT_RSS].rlim_cur,
 1079                             p->p_rlimit[RLIMIT_RSS].rlim_max);
 1080 
 1081                         /*
 1082                          * let processes that are swapped out really be
 1083                          * swapped out set the limit to nothing (will force a
 1084                          * swap-out.)
 1085                          */
 1086                         if ((p->p_flag & P_INMEM) == 0)
 1087                                 limit = 0;      /* XXX */
 1088 
 1089                         size = p->p_vmspace->vm_pmap.pm_stats.resident_count * PAGE_SIZE;
 1090                         if (limit >= 0 && size >= limit) {
 1091                                 vm_pageout_map_deactivate_pages(&p->p_vmspace->vm_map,
 1092                                     (vm_pindex_t)(limit >> PAGE_SHIFT) );
 1093                         }
 1094                 }
 1095 
 1096                 /*
 1097                  * we remove cached objects that have no RSS...
 1098                  */
 1099 restart:
 1100                 object = TAILQ_FIRST(&vm_object_cached_list);
 1101                 while (object) {
 1102                         /*
 1103                          * if there are no resident pages -- get rid of the object
 1104                          */
 1105                         if (object->resident_page_count == 0) {
 1106                                 vm_object_reference(object);
 1107                                 pager_cache(object, FALSE);
 1108                                 goto restart;
 1109                         }
 1110                         object = TAILQ_NEXT(object, cached_list);
 1111                 }
 1112         }
 1113 }
 1114 #endif
Cache object: 3937e85327dbda6522ce3db968ea2542
[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]
This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.
FreeBSD/Linux Kernel Cross Reference sys/vm/vm_pageout.c

FreeBSD/Linux Kernel Cross Reference
sys/vm/vm_pageout.c