vm_swapout.c

Version: - FREEBSD - FREEBSD-13-STABLE - FREEBSD-13-0 - FREEBSD-12-STABLE - FREEBSD-12-0 - FREEBSD-11-STABLE - FREEBSD-11-0 - FREEBSD-10-STABLE - FREEBSD-10-0 - FREEBSD-9-STABLE - FREEBSD-9-0 - FREEBSD-8-STABLE - FREEBSD-8-0 - FREEBSD-7-STABLE - FREEBSD-7-0 - FREEBSD-6-STABLE - FREEBSD-6-0 - FREEBSD-5-STABLE - FREEBSD-5-0 - FREEBSD-4-STABLE - FREEBSD-3-STABLE - FREEBSD22 - l41 - OPENBSD - linux-2.6 - MK84 - PLAN9 - xnu-8792
SearchContext: - none - 3 - 10
    1 /*-
    2  * SPDX-License-Identifier: (BSD-4-Clause AND MIT-CMU)
    3  *
    4  * Copyright (c) 1991 Regents of the University of California.
    5  * All rights reserved.
    6  * Copyright (c) 1994 John S. Dyson
    7  * All rights reserved.
    8  * Copyright (c) 1994 David Greenman
    9  * All rights reserved.
   10  * Copyright (c) 2005 Yahoo! Technologies Norway AS
   11  * All rights reserved.
   12  *
   13  * This code is derived from software contributed to Berkeley by
   14  * The Mach Operating System project at Carnegie-Mellon University.
   15  *
   16  * Redistribution and use in source and binary forms, with or without
   17  * modification, are permitted provided that the following conditions
   18  * are met:
   19  * 1. Redistributions of source code must retain the above copyright
   20  *    notice, this list of conditions and the following disclaimer.
   21  * 2. Redistributions in binary form must reproduce the above copyright
   22  *    notice, this list of conditions and the following disclaimer in the
   23  *    documentation and/or other materials provided with the distribution.
   24  * 3. All advertising materials mentioning features or use of this software
   25  *    must display the following acknowledgement:
   26  *      This product includes software developed by the University of
   27  *      California, Berkeley and its contributors.
   28  * 4. Neither the name of the University nor the names of its contributors
   29  *    may be used to endorse or promote products derived from this software
   30  *    without specific prior written permission.
   31  *
   32  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   33  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   34  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   35  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   36  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   37  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   38  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   39  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   40  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   41  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   42  * SUCH DAMAGE.
   43  *
   44  *      from: @(#)vm_pageout.c  7.4 (Berkeley) 5/7/91
   45  *
   46  *
   47  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
   48  * All rights reserved.
   49  *
   50  * Authors: Avadis Tevanian, Jr., Michael Wayne Young
   51  *
   52  * Permission to use, copy, modify and distribute this software and
   53  * its documentation is hereby granted, provided that both the copyright
   54  * notice and this permission notice appear in all copies of the
   55  * software, derivative works or modified versions, and any portions
   56  * thereof, and that both notices appear in supporting documentation.
   57  *
   58  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
   59  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
   60  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
   61  *
   62  * Carnegie Mellon requests users of this software to return to
   63  *
   64  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
   65  *  School of Computer Science
   66  *  Carnegie Mellon University
   67  *  Pittsburgh PA 15213-3890
   68  *
   69  * any improvements or extensions that they make and grant Carnegie the
   70  * rights to redistribute these changes.
   71  */
   72 
   73 #include <sys/cdefs.h>
   74 __FBSDID("$FreeBSD$");
   75 
   76 #include "opt_kstack_pages.h"
   77 #include "opt_kstack_max_pages.h"
   78 #include "opt_vm.h"
   79 
   80 #include <sys/param.h>
   81 #include <sys/systm.h>
   82 #include <sys/limits.h>
   83 #include <sys/kernel.h>
   84 #include <sys/eventhandler.h>
   85 #include <sys/lock.h>
   86 #include <sys/mutex.h>
   87 #include <sys/proc.h>
   88 #include <sys/kthread.h>
   89 #include <sys/ktr.h>
   90 #include <sys/mount.h>
   91 #include <sys/racct.h>
   92 #include <sys/resourcevar.h>
   93 #include <sys/refcount.h>
   94 #include <sys/sched.h>
   95 #include <sys/sdt.h>
   96 #include <sys/signalvar.h>
   97 #include <sys/smp.h>
   98 #include <sys/time.h>
   99 #include <sys/vnode.h>
  100 #include <sys/vmmeter.h>
  101 #include <sys/rwlock.h>
  102 #include <sys/sx.h>
  103 #include <sys/sysctl.h>
  104 
  105 #include <vm/vm.h>
  106 #include <vm/vm_param.h>
  107 #include <vm/vm_kern.h>
  108 #include <vm/vm_object.h>
  109 #include <vm/vm_page.h>
  110 #include <vm/vm_map.h>
  111 #include <vm/vm_pageout.h>
  112 #include <vm/vm_pager.h>
  113 #include <vm/vm_phys.h>
  114 #include <vm/swap_pager.h>
  115 #include <vm/vm_extern.h>
  116 #include <vm/uma.h>
  117 
  118 /* the kernel process "vm_daemon" */
  119 static void vm_daemon(void);
  120 static struct proc *vmproc;
  121 
  122 static struct kproc_desc vm_kp = {
  123         "vmdaemon",
  124         vm_daemon,
  125         &vmproc
  126 };
  127 SYSINIT(vmdaemon, SI_SUB_KTHREAD_VM, SI_ORDER_FIRST, kproc_start, &vm_kp);
  128 
  129 static int vm_swap_enabled = 1;
  130 static int vm_swap_idle_enabled = 0;
  131 
  132 SYSCTL_INT(_vm, VM_SWAPPING_ENABLED, swap_enabled, CTLFLAG_RW,
  133     &vm_swap_enabled, 0,
  134     "Enable entire process swapout");
  135 SYSCTL_INT(_vm, OID_AUTO, swap_idle_enabled, CTLFLAG_RW,
  136     &vm_swap_idle_enabled, 0,
  137     "Allow swapout on idle criteria");
  138 
  139 /*
  140  * Swap_idle_threshold1 is the guaranteed swapped in time for a process
  141  */
  142 static int swap_idle_threshold1 = 2;
  143 SYSCTL_INT(_vm, OID_AUTO, swap_idle_threshold1, CTLFLAG_RW,
  144     &swap_idle_threshold1, 0,
  145     "Guaranteed swapped in time for a process");
  146 
  147 /*
  148  * Swap_idle_threshold2 is the time that a process can be idle before
  149  * it will be swapped out, if idle swapping is enabled.
  150  */
  151 static int swap_idle_threshold2 = 10;
  152 SYSCTL_INT(_vm, OID_AUTO, swap_idle_threshold2, CTLFLAG_RW,
  153     &swap_idle_threshold2, 0,
  154     "Time before a process will be swapped out");
  155 
  156 static int vm_pageout_req_swapout;      /* XXX */
  157 static int vm_daemon_needed;
  158 static struct mtx vm_daemon_mtx;
  159 /* Allow for use by vm_pageout before vm_daemon is initialized. */
  160 MTX_SYSINIT(vm_daemon, &vm_daemon_mtx, "vm daemon", MTX_DEF);
  161 
  162 static int swapped_cnt;
  163 static int swap_inprogress;     /* Pending swap-ins done outside swapper. */
  164 static int last_swapin;
  165 
  166 static void swapclear(struct proc *);
  167 static int swapout(struct proc *);
  168 static void vm_swapout_map_deactivate_pages(vm_map_t, long);
  169 static void vm_swapout_object_deactivate(pmap_t, vm_object_t, long);
  170 static void swapout_procs(int action);
  171 static void vm_req_vmdaemon(int req);
  172 static void vm_thread_swapout(struct thread *td);
  173 
  174 static void
  175 vm_swapout_object_deactivate_page(pmap_t pmap, vm_page_t m, bool unmap)
  176 {
  177 
  178         /*
  179          * Ignore unreclaimable wired pages.  Repeat the check after busying
  180          * since a busy holder may wire the page.
  181          */
  182         if (vm_page_wired(m) || !vm_page_tryxbusy(m))
  183                 return;
  184 
  185         if (vm_page_wired(m) || !pmap_page_exists_quick(pmap, m)) {
  186                 vm_page_xunbusy(m);
  187                 return;
  188         }
  189         if (!pmap_is_referenced(m)) {
  190                 if (!vm_page_active(m))
  191                         (void)vm_page_try_remove_all(m);
  192                 else if (unmap && vm_page_try_remove_all(m))
  193                         vm_page_deactivate(m);
  194         }
  195         vm_page_xunbusy(m);
  196 }
  197 
  198 /*
  199  *      vm_swapout_object_deactivate
  200  *
  201  *      Deactivate enough pages to satisfy the inactive target
  202  *      requirements.
  203  *
  204  *      The object and map must be locked.
  205  */
  206 static void
  207 vm_swapout_object_deactivate(pmap_t pmap, vm_object_t first_object,
  208     long desired)
  209 {
  210         vm_object_t backing_object, object;
  211         vm_page_t m;
  212         bool unmap;
  213 
  214         VM_OBJECT_ASSERT_LOCKED(first_object);
  215         if ((first_object->flags & OBJ_FICTITIOUS) != 0)
  216                 return;
  217         for (object = first_object;; object = backing_object) {
  218                 if (pmap_resident_count(pmap) <= desired)
  219                         goto unlock_return;
  220                 VM_OBJECT_ASSERT_LOCKED(object);
  221                 if ((object->flags & OBJ_UNMANAGED) != 0 ||
  222                     blockcount_read(&object->paging_in_progress) > 0)
  223                         goto unlock_return;
  224 
  225                 unmap = true;
  226                 if (object->shadow_count > 1)
  227                         unmap = false;
  228 
  229                 /*
  230                  * Scan the object's entire memory queue.
  231                  */
  232                 TAILQ_FOREACH(m, &object->memq, listq) {
  233                         if (pmap_resident_count(pmap) <= desired)
  234                                 goto unlock_return;
  235                         if (should_yield())
  236                                 goto unlock_return;
  237                         vm_swapout_object_deactivate_page(pmap, m, unmap);
  238                 }
  239                 if ((backing_object = object->backing_object) == NULL)
  240                         goto unlock_return;
  241                 VM_OBJECT_RLOCK(backing_object);
  242                 if (object != first_object)
  243                         VM_OBJECT_RUNLOCK(object);
  244         }
  245 unlock_return:
  246         if (object != first_object)
  247                 VM_OBJECT_RUNLOCK(object);
  248 }
  249 
  250 /*
  251  * deactivate some number of pages in a map, try to do it fairly, but
  252  * that is really hard to do.
  253  */
  254 static void
  255 vm_swapout_map_deactivate_pages(vm_map_t map, long desired)
  256 {
  257         vm_map_entry_t tmpe;
  258         vm_object_t obj, bigobj;
  259         int nothingwired;
  260 
  261         if (!vm_map_trylock_read(map))
  262                 return;
  263 
  264         bigobj = NULL;
  265         nothingwired = TRUE;
  266 
  267         /*
  268          * first, search out the biggest object, and try to free pages from
  269          * that.
  270          */
  271         VM_MAP_ENTRY_FOREACH(tmpe, map) {
  272                 if ((tmpe->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) {
  273                         obj = tmpe->object.vm_object;
  274                         if (obj != NULL && VM_OBJECT_TRYRLOCK(obj)) {
  275                                 if (obj->shadow_count <= 1 &&
  276                                     (bigobj == NULL ||
  277                                      bigobj->resident_page_count <
  278                                      obj->resident_page_count)) {
  279                                         if (bigobj != NULL)
  280                                                 VM_OBJECT_RUNLOCK(bigobj);
  281                                         bigobj = obj;
  282                                 } else
  283                                         VM_OBJECT_RUNLOCK(obj);
  284                         }
  285                 }
  286                 if (tmpe->wired_count > 0)
  287                         nothingwired = FALSE;
  288         }
  289 
  290         if (bigobj != NULL) {
  291                 vm_swapout_object_deactivate(map->pmap, bigobj, desired);
  292                 VM_OBJECT_RUNLOCK(bigobj);
  293         }
  294         /*
  295          * Next, hunt around for other pages to deactivate.  We actually
  296          * do this search sort of wrong -- .text first is not the best idea.
  297          */
  298         VM_MAP_ENTRY_FOREACH(tmpe, map) {
  299                 if (pmap_resident_count(vm_map_pmap(map)) <= desired)
  300                         break;
  301                 if ((tmpe->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) {
  302                         obj = tmpe->object.vm_object;
  303                         if (obj != NULL) {
  304                                 VM_OBJECT_RLOCK(obj);
  305                                 vm_swapout_object_deactivate(map->pmap, obj,
  306                                     desired);
  307                                 VM_OBJECT_RUNLOCK(obj);
  308                         }
  309                 }
  310         }
  311 
  312         /*
  313          * Remove all mappings if a process is swapped out, this will free page
  314          * table pages.
  315          */
  316         if (desired == 0 && nothingwired) {
  317                 pmap_remove(vm_map_pmap(map), vm_map_min(map),
  318                     vm_map_max(map));
  319         }
  320 
  321         vm_map_unlock_read(map);
  322 }
  323 
  324 /*
  325  * Swap out requests
  326  */
  327 #define VM_SWAP_NORMAL 1
  328 #define VM_SWAP_IDLE 2
  329 
  330 void
  331 vm_swapout_run(void)
  332 {
  333 
  334         if (vm_swap_enabled)
  335                 vm_req_vmdaemon(VM_SWAP_NORMAL);
  336 }
  337 
  338 /*
  339  * Idle process swapout -- run once per second when pagedaemons are
  340  * reclaiming pages.
  341  */
  342 void
  343 vm_swapout_run_idle(void)
  344 {
  345         static long lsec;
  346 
  347         if (!vm_swap_idle_enabled || time_second == lsec)
  348                 return;
  349         vm_req_vmdaemon(VM_SWAP_IDLE);
  350         lsec = time_second;
  351 }
  352 
  353 static void
  354 vm_req_vmdaemon(int req)
  355 {
  356         static int lastrun = 0;
  357 
  358         mtx_lock(&vm_daemon_mtx);
  359         vm_pageout_req_swapout |= req;
  360         if ((ticks > (lastrun + hz)) || (ticks < lastrun)) {
  361                 wakeup(&vm_daemon_needed);
  362                 lastrun = ticks;
  363         }
  364         mtx_unlock(&vm_daemon_mtx);
  365 }
  366 
  367 static void
  368 vm_daemon(void)
  369 {
  370         struct rlimit rsslim;
  371         struct proc *p;
  372         struct thread *td;
  373         struct vmspace *vm;
  374         int breakout, swapout_flags, tryagain, attempts;
  375 #ifdef RACCT
  376         uint64_t rsize, ravailable;
  377 #endif
  378 
  379         while (TRUE) {
  380                 mtx_lock(&vm_daemon_mtx);
  381                 msleep(&vm_daemon_needed, &vm_daemon_mtx, PPAUSE, "psleep",
  382 #ifdef RACCT
  383                     racct_enable ? hz : 0
  384 #else
  385                     0
  386 #endif
  387                 );
  388                 swapout_flags = vm_pageout_req_swapout;
  389                 vm_pageout_req_swapout = 0;
  390                 mtx_unlock(&vm_daemon_mtx);
  391                 if (swapout_flags != 0) {
  392                         /*
  393                          * Drain the per-CPU page queue batches as a deadlock
  394                          * avoidance measure.
  395                          */
  396                         if ((swapout_flags & VM_SWAP_NORMAL) != 0)
  397                                 vm_page_pqbatch_drain();
  398                         swapout_procs(swapout_flags);
  399                 }
  400 
  401                 /*
  402                  * scan the processes for exceeding their rlimits or if
  403                  * process is swapped out -- deactivate pages
  404                  */
  405                 tryagain = 0;
  406                 attempts = 0;
  407 again:
  408                 attempts++;
  409                 sx_slock(&allproc_lock);
  410                 FOREACH_PROC_IN_SYSTEM(p) {
  411                         vm_pindex_t limit, size;
  412 
  413                         /*
  414                          * if this is a system process or if we have already
  415                          * looked at this process, skip it.
  416                          */
  417                         PROC_LOCK(p);
  418                         if (p->p_state != PRS_NORMAL ||
  419                             p->p_flag & (P_INEXEC | P_SYSTEM | P_WEXIT)) {
  420                                 PROC_UNLOCK(p);
  421                                 continue;
  422                         }
  423                         /*
  424                          * if the process is in a non-running type state,
  425                          * don't touch it.
  426                          */
  427                         breakout = 0;
  428                         FOREACH_THREAD_IN_PROC(p, td) {
  429                                 thread_lock(td);
  430                                 if (!TD_ON_RUNQ(td) &&
  431                                     !TD_IS_RUNNING(td) &&
  432                                     !TD_IS_SLEEPING(td) &&
  433                                     !TD_IS_SUSPENDED(td)) {
  434                                         thread_unlock(td);
  435                                         breakout = 1;
  436                                         break;
  437                                 }
  438                                 thread_unlock(td);
  439                         }
  440                         if (breakout) {
  441                                 PROC_UNLOCK(p);
  442                                 continue;
  443                         }
  444                         /*
  445                          * get a limit
  446                          */
  447                         lim_rlimit_proc(p, RLIMIT_RSS, &rsslim);
  448                         limit = OFF_TO_IDX(
  449                             qmin(rsslim.rlim_cur, rsslim.rlim_max));
  450 
  451                         /*
  452                          * let processes that are swapped out really be
  453                          * swapped out set the limit to nothing (will force a
  454                          * swap-out.)
  455                          */
  456                         if ((p->p_flag & P_INMEM) == 0)
  457                                 limit = 0;      /* XXX */
  458                         vm = vmspace_acquire_ref(p);
  459                         _PHOLD_LITE(p);
  460                         PROC_UNLOCK(p);
  461                         if (vm == NULL) {
  462                                 PRELE(p);
  463                                 continue;
  464                         }
  465                         sx_sunlock(&allproc_lock);
  466 
  467                         size = vmspace_resident_count(vm);
  468                         if (size >= limit) {
  469                                 vm_swapout_map_deactivate_pages(
  470                                     &vm->vm_map, limit);
  471                                 size = vmspace_resident_count(vm);
  472                         }
  473 #ifdef RACCT
  474                         if (racct_enable) {
  475                                 rsize = IDX_TO_OFF(size);
  476                                 PROC_LOCK(p);
  477                                 if (p->p_state == PRS_NORMAL)
  478                                         racct_set(p, RACCT_RSS, rsize);
  479                                 ravailable = racct_get_available(p, RACCT_RSS);
  480                                 PROC_UNLOCK(p);
  481                                 if (rsize > ravailable) {
  482                                         /*
  483                                          * Don't be overly aggressive; this
  484                                          * might be an innocent process,
  485                                          * and the limit could've been exceeded
  486                                          * by some memory hog.  Don't try
  487                                          * to deactivate more than 1/4th
  488                                          * of process' resident set size.
  489                                          */
  490                                         if (attempts <= 8) {
  491                                                 if (ravailable < rsize -
  492                                                     (rsize / 4)) {
  493                                                         ravailable = rsize -
  494                                                             (rsize / 4);
  495                                                 }
  496                                         }
  497                                         vm_swapout_map_deactivate_pages(
  498                                             &vm->vm_map,
  499                                             OFF_TO_IDX(ravailable));
  500                                         /* Update RSS usage after paging out. */
  501                                         size = vmspace_resident_count(vm);
  502                                         rsize = IDX_TO_OFF(size);
  503                                         PROC_LOCK(p);
  504                                         if (p->p_state == PRS_NORMAL)
  505                                                 racct_set(p, RACCT_RSS, rsize);
  506                                         PROC_UNLOCK(p);
  507                                         if (rsize > ravailable)
  508                                                 tryagain = 1;
  509                                 }
  510                         }
  511 #endif
  512                         vmspace_free(vm);
  513                         sx_slock(&allproc_lock);
  514                         PRELE(p);
  515                 }
  516                 sx_sunlock(&allproc_lock);
  517                 if (tryagain != 0 && attempts <= 10) {
  518                         maybe_yield();
  519                         goto again;
  520                 }
  521         }
  522 }
  523 
  524 /*
  525  * Allow a thread's kernel stack to be paged out.
  526  */
  527 static void
  528 vm_thread_swapout(struct thread *td)
  529 {
  530         vm_page_t m;
  531         vm_offset_t kaddr;
  532         vm_pindex_t pindex;
  533         int i, pages;
  534 
  535         cpu_thread_swapout(td);
  536         kaddr = td->td_kstack;
  537         pages = td->td_kstack_pages;
  538         pindex = atop(kaddr - VM_MIN_KERNEL_ADDRESS);
  539         pmap_qremove(kaddr, pages);
  540         VM_OBJECT_WLOCK(kstack_object);
  541         for (i = 0; i < pages; i++) {
  542                 m = vm_page_lookup(kstack_object, pindex + i);
  543                 if (m == NULL)
  544                         panic("vm_thread_swapout: kstack already missing?");
  545                 vm_page_dirty(m);
  546                 vm_page_xunbusy_unchecked(m);
  547                 vm_page_unwire(m, PQ_LAUNDRY);
  548         }
  549         VM_OBJECT_WUNLOCK(kstack_object);
  550 }
  551 
  552 /*
  553  * Bring the kernel stack for a specified thread back in.
  554  */
  555 static void
  556 vm_thread_swapin(struct thread *td, int oom_alloc)
  557 {
  558         vm_page_t ma[KSTACK_MAX_PAGES];
  559         vm_offset_t kaddr;
  560         int a, count, i, j, pages, rv;
  561 
  562         kaddr = td->td_kstack;
  563         pages = td->td_kstack_pages;
  564         vm_thread_stack_back(td->td_domain.dr_policy, kaddr, ma, pages,
  565             oom_alloc);
  566         for (i = 0; i < pages;) {
  567                 vm_page_assert_xbusied(ma[i]);
  568                 if (vm_page_all_valid(ma[i])) {
  569                         i++;
  570                         continue;
  571                 }
  572                 vm_object_pip_add(kstack_object, 1);
  573                 for (j = i + 1; j < pages; j++)
  574                         if (vm_page_all_valid(ma[j]))
  575                                 break;
  576                 VM_OBJECT_WLOCK(kstack_object);
  577                 rv = vm_pager_has_page(kstack_object, ma[i]->pindex, NULL, &a);
  578                 VM_OBJECT_WUNLOCK(kstack_object);
  579                 KASSERT(rv == 1, ("%s: missing page %p", __func__, ma[i]));
  580                 count = min(a + 1, j - i);
  581                 rv = vm_pager_get_pages(kstack_object, ma + i, count, NULL, NULL);
  582                 KASSERT(rv == VM_PAGER_OK, ("%s: cannot get kstack for proc %d",
  583                     __func__, td->td_proc->p_pid));
  584                 vm_object_pip_wakeup(kstack_object);
  585                 i += count;
  586         }
  587         pmap_qenter(kaddr, ma, pages);
  588         cpu_thread_swapin(td);
  589 }
  590 
  591 void
  592 faultin(struct proc *p)
  593 {
  594         struct thread *td;
  595         int oom_alloc;
  596 
  597         PROC_LOCK_ASSERT(p, MA_OWNED);
  598 
  599         /*
  600          * If another process is swapping in this process,
  601          * just wait until it finishes.
  602          */
  603         if (p->p_flag & P_SWAPPINGIN) {
  604                 while (p->p_flag & P_SWAPPINGIN)
  605                         msleep(&p->p_flag, &p->p_mtx, PVM, "faultin", 0);
  606                 return;
  607         }
  608 
  609         if ((p->p_flag & P_INMEM) == 0) {
  610                 oom_alloc = (p->p_flag & P_WKILLED) != 0 ? VM_ALLOC_SYSTEM :
  611                     VM_ALLOC_NORMAL;
  612 
  613                 /*
  614                  * Don't let another thread swap process p out while we are
  615                  * busy swapping it in.
  616                  */
  617                 ++p->p_lock;
  618                 p->p_flag |= P_SWAPPINGIN;
  619                 PROC_UNLOCK(p);
  620                 sx_xlock(&allproc_lock);
  621                 MPASS(swapped_cnt > 0);
  622                 swapped_cnt--;
  623                 if (curthread != &thread0)
  624                         swap_inprogress++;
  625                 sx_xunlock(&allproc_lock);
  626 
  627                 /*
  628                  * We hold no lock here because the list of threads
  629                  * can not change while all threads in the process are
  630                  * swapped out.
  631                  */
  632                 FOREACH_THREAD_IN_PROC(p, td)
  633                         vm_thread_swapin(td, oom_alloc);
  634 
  635                 if (curthread != &thread0) {
  636                         sx_xlock(&allproc_lock);
  637                         MPASS(swap_inprogress > 0);
  638                         swap_inprogress--;
  639                         last_swapin = ticks;
  640                         sx_xunlock(&allproc_lock);
  641                 }
  642                 PROC_LOCK(p);
  643                 swapclear(p);
  644                 p->p_swtick = ticks;
  645 
  646                 /* Allow other threads to swap p out now. */
  647                 wakeup(&p->p_flag);
  648                 --p->p_lock;
  649         }
  650 }
  651 
  652 /*
  653  * This swapin algorithm attempts to swap-in processes only if there
  654  * is enough space for them.  Of course, if a process waits for a long
  655  * time, it will be swapped in anyway.
  656  */
  657 
  658 static struct proc *
  659 swapper_selector(bool wkilled_only)
  660 {
  661         struct proc *p, *res;
  662         struct thread *td;
  663         int ppri, pri, slptime, swtime;
  664 
  665         sx_assert(&allproc_lock, SA_SLOCKED);
  666         if (swapped_cnt == 0)
  667                 return (NULL);
  668         res = NULL;
  669         ppri = INT_MIN;
  670         FOREACH_PROC_IN_SYSTEM(p) {
  671                 PROC_LOCK(p);
  672                 if (p->p_state == PRS_NEW || (p->p_flag & (P_SWAPPINGOUT |
  673                     P_SWAPPINGIN | P_INMEM)) != 0) {
  674                         PROC_UNLOCK(p);
  675                         continue;
  676                 }
  677                 if (p->p_state == PRS_NORMAL && (p->p_flag & P_WKILLED) != 0) {
  678                         /*
  679                          * A swapped-out process might have mapped a
  680                          * large portion of the system's pages as
  681                          * anonymous memory.  There is no other way to
  682                          * release the memory other than to kill the
  683                          * process, for which we need to swap it in.
  684                          */
  685                         return (p);
  686                 }
  687                 if (wkilled_only) {
  688                         PROC_UNLOCK(p);
  689                         continue;
  690                 }
  691                 swtime = (ticks - p->p_swtick) / hz;
  692                 FOREACH_THREAD_IN_PROC(p, td) {
  693                         /*
  694                          * An otherwise runnable thread of a process
  695                          * swapped out has only the TDI_SWAPPED bit set.
  696                          */
  697                         thread_lock(td);
  698                         if (td->td_inhibitors == TDI_SWAPPED) {
  699                                 slptime = (ticks - td->td_slptick) / hz;
  700                                 pri = swtime + slptime;
  701                                 if ((td->td_flags & TDF_SWAPINREQ) == 0)
  702                                         pri -= p->p_nice * 8;
  703                                 /*
  704                                  * if this thread is higher priority
  705                                  * and there is enough space, then select
  706                                  * this process instead of the previous
  707                                  * selection.
  708                                  */
  709                                 if (pri > ppri) {
  710                                         res = p;
  711                                         ppri = pri;
  712                                 }
  713                         }
  714                         thread_unlock(td);
  715                 }
  716                 PROC_UNLOCK(p);
  717         }
  718 
  719         if (res != NULL)
  720                 PROC_LOCK(res);
  721         return (res);
  722 }
  723 
  724 #define SWAPIN_INTERVAL (MAXSLP * hz / 2)
  725 
  726 /*
  727  * Limit swapper to swap in one non-WKILLED process in MAXSLP/2
  728  * interval, assuming that there is:
  729  * - at least one domain that is not suffering from a shortage of free memory;
  730  * - no parallel swap-ins;
  731  * - no other swap-ins in the current SWAPIN_INTERVAL.
  732  */
  733 static bool
  734 swapper_wkilled_only(void)
  735 {
  736 
  737         return (vm_page_count_min_set(&all_domains) || swap_inprogress > 0 ||
  738             (u_int)(ticks - last_swapin) < SWAPIN_INTERVAL);
  739 }
  740 
  741 void
  742 swapper(void)
  743 {
  744         struct proc *p;
  745 
  746         for (;;) {
  747                 sx_slock(&allproc_lock);
  748                 p = swapper_selector(swapper_wkilled_only());
  749                 sx_sunlock(&allproc_lock);
  750 
  751                 if (p == NULL) {
  752                         tsleep(&proc0, PVM, "swapin", SWAPIN_INTERVAL);
  753                 } else {
  754                         PROC_LOCK_ASSERT(p, MA_OWNED);
  755 
  756                         /*
  757                          * Another process may be bringing or may have
  758                          * already brought this process in while we
  759                          * traverse all threads.  Or, this process may
  760                          * have exited or even being swapped out
  761                          * again.
  762                          */
  763                         if (p->p_state == PRS_NORMAL && (p->p_flag & (P_INMEM |
  764                             P_SWAPPINGOUT | P_SWAPPINGIN)) == 0) {
  765                                 faultin(p);
  766                         }
  767                         PROC_UNLOCK(p);
  768                 }
  769         }
  770 }
  771 
  772 /*
  773  * First, if any processes have been sleeping or stopped for at least
  774  * "swap_idle_threshold1" seconds, they are swapped out.  If, however,
  775  * no such processes exist, then the longest-sleeping or stopped
  776  * process is swapped out.  Finally, and only as a last resort, if
  777  * there are no sleeping or stopped processes, the longest-resident
  778  * process is swapped out.
  779  */
  780 static void
  781 swapout_procs(int action)
  782 {
  783         struct proc *p;
  784         struct thread *td;
  785         int slptime;
  786         bool didswap, doswap;
  787 
  788         MPASS((action & (VM_SWAP_NORMAL | VM_SWAP_IDLE)) != 0);
  789 
  790         didswap = false;
  791         sx_slock(&allproc_lock);
  792         FOREACH_PROC_IN_SYSTEM(p) {
  793                 /*
  794                  * Filter out not yet fully constructed processes.  Do
  795                  * not swap out held processes.  Avoid processes which
  796                  * are system, exiting, execing, traced, already swapped
  797                  * out or are in the process of being swapped in or out.
  798                  */
  799                 PROC_LOCK(p);
  800                 if (p->p_state != PRS_NORMAL || p->p_lock != 0 || (p->p_flag &
  801                     (P_SYSTEM | P_WEXIT | P_INEXEC | P_STOPPED_SINGLE |
  802                     P_TRACED | P_SWAPPINGOUT | P_SWAPPINGIN | P_INMEM)) !=
  803                     P_INMEM) {
  804                         PROC_UNLOCK(p);
  805                         continue;
  806                 }
  807 
  808                 /*
  809                  * Further consideration of this process for swap out
  810                  * requires iterating over its threads.  We release
  811                  * allproc_lock here so that process creation and
  812                  * destruction are not blocked while we iterate.
  813                  *
  814                  * To later reacquire allproc_lock and resume
  815                  * iteration over the allproc list, we will first have
  816                  * to release the lock on the process.  We place a
  817                  * hold on the process so that it remains in the
  818                  * allproc list while it is unlocked.
  819                  */
  820                 _PHOLD_LITE(p);
  821                 sx_sunlock(&allproc_lock);
  822 
  823                 /*
  824                  * Do not swapout a realtime process.
  825                  * Guarantee swap_idle_threshold1 time in memory.
  826                  * If the system is under memory stress, or if we are
  827                  * swapping idle processes >= swap_idle_threshold2,
  828                  * then swap the process out.
  829                  */
  830                 doswap = true;
  831                 FOREACH_THREAD_IN_PROC(p, td) {
  832                         thread_lock(td);
  833                         slptime = (ticks - td->td_slptick) / hz;
  834                         if (PRI_IS_REALTIME(td->td_pri_class) ||
  835                             slptime < swap_idle_threshold1 ||
  836                             !thread_safetoswapout(td) ||
  837                             ((action & VM_SWAP_NORMAL) == 0 &&
  838                             slptime < swap_idle_threshold2))
  839                                 doswap = false;
  840                         thread_unlock(td);
  841                         if (!doswap)
  842                                 break;
  843                 }
  844                 if (doswap && swapout(p) == 0)
  845                         didswap = true;
  846 
  847                 PROC_UNLOCK(p);
  848                 if (didswap) {
  849                         sx_xlock(&allproc_lock);
  850                         swapped_cnt++;
  851                         sx_downgrade(&allproc_lock);
  852                 } else
  853                         sx_slock(&allproc_lock);
  854                 PRELE(p);
  855         }
  856         sx_sunlock(&allproc_lock);
  857 
  858         /*
  859          * If we swapped something out, and another process needed memory,
  860          * then wakeup the sched process.
  861          */
  862         if (didswap)
  863                 wakeup(&proc0);
  864 }
  865 
  866 static void
  867 swapclear(struct proc *p)
  868 {
  869         struct thread *td;
  870 
  871         PROC_LOCK_ASSERT(p, MA_OWNED);
  872 
  873         FOREACH_THREAD_IN_PROC(p, td) {
  874                 thread_lock(td);
  875                 td->td_flags |= TDF_INMEM;
  876                 td->td_flags &= ~TDF_SWAPINREQ;
  877                 TD_CLR_SWAPPED(td);
  878                 if (TD_CAN_RUN(td)) {
  879                         if (setrunnable(td, 0)) {
  880 #ifdef INVARIANTS
  881                                 /*
  882                                  * XXX: We just cleared TDI_SWAPPED
  883                                  * above and set TDF_INMEM, so this
  884                                  * should never happen.
  885                                  */
  886                                 panic("not waking up swapper");
  887 #endif
  888                         }
  889                 } else
  890                         thread_unlock(td);
  891         }
  892         p->p_flag &= ~(P_SWAPPINGIN | P_SWAPPINGOUT);
  893         p->p_flag |= P_INMEM;
  894 }
  895 
  896 static int
  897 swapout(struct proc *p)
  898 {
  899         struct thread *td;
  900 
  901         PROC_LOCK_ASSERT(p, MA_OWNED);
  902 
  903         /*
  904          * The states of this process and its threads may have changed
  905          * by now.  Assuming that there is only one pageout daemon thread,
  906          * this process should still be in memory.
  907          */
  908         KASSERT((p->p_flag & (P_INMEM | P_SWAPPINGOUT | P_SWAPPINGIN)) ==
  909             P_INMEM, ("swapout: lost a swapout race?"));
  910 
  911         /*
  912          * Remember the resident count.
  913          */
  914         p->p_vmspace->vm_swrss = vmspace_resident_count(p->p_vmspace);
  915 
  916         /*
  917          * Check and mark all threads before we proceed.
  918          */
  919         p->p_flag &= ~P_INMEM;
  920         p->p_flag |= P_SWAPPINGOUT;
  921         FOREACH_THREAD_IN_PROC(p, td) {
  922                 thread_lock(td);
  923                 if (!thread_safetoswapout(td)) {
  924                         thread_unlock(td);
  925                         swapclear(p);
  926                         return (EBUSY);
  927                 }
  928                 td->td_flags &= ~TDF_INMEM;
  929                 TD_SET_SWAPPED(td);
  930                 thread_unlock(td);
  931         }
  932         td = FIRST_THREAD_IN_PROC(p);
  933         ++td->td_ru.ru_nswap;
  934         PROC_UNLOCK(p);
  935 
  936         /*
  937          * This list is stable because all threads are now prevented from
  938          * running.  The list is only modified in the context of a running
  939          * thread in this process.
  940          */
  941         FOREACH_THREAD_IN_PROC(p, td)
  942                 vm_thread_swapout(td);
  943 
  944         PROC_LOCK(p);
  945         p->p_flag &= ~P_SWAPPINGOUT;
  946         p->p_swtick = ticks;
  947         return (0);
  948 }
Cache object: faa7fc9b41ec29494f6cfe10ee127f9c
[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]
This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.
FreeBSD/Linux Kernel Cross Reference sys/vm/vm_swapout.c

FreeBSD/Linux Kernel Cross Reference
sys/vm/vm_swapout.c