vm_swapout.c

Version: - FREEBSD - FREEBSD-13-STABLE - FREEBSD-13-0 - FREEBSD-12-STABLE - FREEBSD-12-0 - FREEBSD-11-STABLE - FREEBSD-11-0 - FREEBSD-10-STABLE - FREEBSD-10-0 - FREEBSD-9-STABLE - FREEBSD-9-0 - FREEBSD-8-STABLE - FREEBSD-8-0 - FREEBSD-7-STABLE - FREEBSD-7-0 - FREEBSD-6-STABLE - FREEBSD-6-0 - FREEBSD-5-STABLE - FREEBSD-5-0 - FREEBSD-4-STABLE - FREEBSD-3-STABLE - FREEBSD22 - l41 - OPENBSD - linux-2.6 - MK84 - PLAN9 - xnu-8792
SearchContext: - none - 3 - 10
    1 /*-
    2  * Copyright (c) 1991 Regents of the University of California.
    3  * All rights reserved.
    4  * Copyright (c) 1994 John S. Dyson
    5  * All rights reserved.
    6  * Copyright (c) 1994 David Greenman
    7  * All rights reserved.
    8  * Copyright (c) 2005 Yahoo! Technologies Norway AS
    9  * All rights reserved.
   10  *
   11  * This code is derived from software contributed to Berkeley by
   12  * The Mach Operating System project at Carnegie-Mellon University.
   13  *
   14  * Redistribution and use in source and binary forms, with or without
   15  * modification, are permitted provided that the following conditions
   16  * are met:
   17  * 1. Redistributions of source code must retain the above copyright
   18  *    notice, this list of conditions and the following disclaimer.
   19  * 2. Redistributions in binary form must reproduce the above copyright
   20  *    notice, this list of conditions and the following disclaimer in the
   21  *    documentation and/or other materials provided with the distribution.
   22  * 3. All advertising materials mentioning features or use of this software
   23  *    must display the following acknowledgement:
   24  *      This product includes software developed by the University of
   25  *      California, Berkeley and its contributors.
   26  * 4. Neither the name of the University nor the names of its contributors
   27  *    may be used to endorse or promote products derived from this software
   28  *    without specific prior written permission.
   29  *
   30  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   31  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   32  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   33  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   34  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   35  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   36  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   37  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   38  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   39  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   40  * SUCH DAMAGE.
   41  *
   42  *      from: @(#)vm_pageout.c  7.4 (Berkeley) 5/7/91
   43  *
   44  *
   45  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
   46  * All rights reserved.
   47  *
   48  * Authors: Avadis Tevanian, Jr., Michael Wayne Young
   49  *
   50  * Permission to use, copy, modify and distribute this software and
   51  * its documentation is hereby granted, provided that both the copyright
   52  * notice and this permission notice appear in all copies of the
   53  * software, derivative works or modified versions, and any portions
   54  * thereof, and that both notices appear in supporting documentation.
   55  *
   56  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
   57  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
   58  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
   59  *
   60  * Carnegie Mellon requests users of this software to return to
   61  *
   62  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
   63  *  School of Computer Science
   64  *  Carnegie Mellon University
   65  *  Pittsburgh PA 15213-3890
   66  *
   67  * any improvements or extensions that they make and grant Carnegie the
   68  * rights to redistribute these changes.
   69  */
   70 
   71 #include <sys/cdefs.h>
   72 __FBSDID("$FreeBSD$");
   73 
   74 #include "opt_kstack_pages.h"
   75 #include "opt_kstack_max_pages.h"
   76 #include "opt_vm.h"
   77 
   78 #include <sys/param.h>
   79 #include <sys/systm.h>
   80 #include <sys/limits.h>
   81 #include <sys/kernel.h>
   82 #include <sys/eventhandler.h>
   83 #include <sys/lock.h>
   84 #include <sys/mutex.h>
   85 #include <sys/proc.h>
   86 #include <sys/_kstack_cache.h>
   87 #include <sys/kthread.h>
   88 #include <sys/ktr.h>
   89 #include <sys/mount.h>
   90 #include <sys/racct.h>
   91 #include <sys/resourcevar.h>
   92 #include <sys/sched.h>
   93 #include <sys/sdt.h>
   94 #include <sys/signalvar.h>
   95 #include <sys/smp.h>
   96 #include <sys/time.h>
   97 #include <sys/vnode.h>
   98 #include <sys/vmmeter.h>
   99 #include <sys/rwlock.h>
  100 #include <sys/sx.h>
  101 #include <sys/sysctl.h>
  102 
  103 #include <vm/vm.h>
  104 #include <vm/vm_param.h>
  105 #include <vm/vm_object.h>
  106 #include <vm/vm_page.h>
  107 #include <vm/vm_map.h>
  108 #include <vm/vm_pageout.h>
  109 #include <vm/vm_pager.h>
  110 #include <vm/vm_phys.h>
  111 #include <vm/swap_pager.h>
  112 #include <vm/vm_extern.h>
  113 #include <vm/uma.h>
  114 
  115 /* the kernel process "vm_daemon" */
  116 static void vm_daemon(void);
  117 static struct proc *vmproc;
  118 
  119 static struct kproc_desc vm_kp = {
  120         "vmdaemon",
  121         vm_daemon,
  122         &vmproc
  123 };
  124 SYSINIT(vmdaemon, SI_SUB_KTHREAD_VM, SI_ORDER_FIRST, kproc_start, &vm_kp);
  125 
  126 static int vm_swap_enabled = 1;
  127 static int vm_swap_idle_enabled = 0;
  128 
  129 SYSCTL_INT(_vm, VM_SWAPPING_ENABLED, swap_enabled, CTLFLAG_RW,
  130     &vm_swap_enabled, 0,
  131     "Enable entire process swapout");
  132 SYSCTL_INT(_vm, OID_AUTO, swap_idle_enabled, CTLFLAG_RW,
  133     &vm_swap_idle_enabled, 0,
  134     "Allow swapout on idle criteria");
  135 
  136 /*
  137  * Swap_idle_threshold1 is the guaranteed swapped in time for a process
  138  */
  139 static int swap_idle_threshold1 = 2;
  140 SYSCTL_INT(_vm, OID_AUTO, swap_idle_threshold1, CTLFLAG_RW,
  141     &swap_idle_threshold1, 0,
  142     "Guaranteed swapped in time for a process");
  143 
  144 /*
  145  * Swap_idle_threshold2 is the time that a process can be idle before
  146  * it will be swapped out, if idle swapping is enabled.
  147  */
  148 static int swap_idle_threshold2 = 10;
  149 SYSCTL_INT(_vm, OID_AUTO, swap_idle_threshold2, CTLFLAG_RW,
  150     &swap_idle_threshold2, 0,
  151     "Time before a process will be swapped out");
  152 
  153 static int vm_pageout_req_swapout;      /* XXX */
  154 static int vm_daemon_needed;
  155 static struct mtx vm_daemon_mtx;
  156 /* Allow for use by vm_pageout before vm_daemon is initialized. */
  157 MTX_SYSINIT(vm_daemon, &vm_daemon_mtx, "vm daemon", MTX_DEF);
  158 
  159 static int swapped_cnt;
  160 static int swap_inprogress;     /* Pending swap-ins done outside swapper. */
  161 static int last_swapin;
  162 
  163 static void swapclear(struct proc *);
  164 static int swapout(struct proc *);
  165 static void vm_swapout_map_deactivate_pages(vm_map_t, long);
  166 static void vm_swapout_object_deactivate_pages(pmap_t, vm_object_t, long);
  167 static void swapout_procs(int action);
  168 static void vm_req_vmdaemon(int req);
  169 static void vm_thread_swapout(struct thread *td);
  170 
  171 /*
  172  *      vm_swapout_object_deactivate_pages
  173  *
  174  *      Deactivate enough pages to satisfy the inactive target
  175  *      requirements.
  176  *
  177  *      The object and map must be locked.
  178  */
  179 static void
  180 vm_swapout_object_deactivate_pages(pmap_t pmap, vm_object_t first_object,
  181     long desired)
  182 {
  183         vm_object_t backing_object, object;
  184         vm_page_t p;
  185         int act_delta, remove_mode;
  186 
  187         VM_OBJECT_ASSERT_LOCKED(first_object);
  188         if ((first_object->flags & OBJ_FICTITIOUS) != 0)
  189                 return;
  190         for (object = first_object;; object = backing_object) {
  191                 if (pmap_resident_count(pmap) <= desired)
  192                         goto unlock_return;
  193                 VM_OBJECT_ASSERT_LOCKED(object);
  194                 if ((object->flags & OBJ_UNMANAGED) != 0 ||
  195                     object->paging_in_progress != 0)
  196                         goto unlock_return;
  197 
  198                 remove_mode = 0;
  199                 if (object->shadow_count > 1)
  200                         remove_mode = 1;
  201                 /*
  202                  * Scan the object's entire memory queue.
  203                  */
  204                 TAILQ_FOREACH(p, &object->memq, listq) {
  205                         if (pmap_resident_count(pmap) <= desired)
  206                                 goto unlock_return;
  207                         if (should_yield())
  208                                 goto unlock_return;
  209                         if (vm_page_busied(p))
  210                                 continue;
  211                         PCPU_INC(cnt.v_pdpages);
  212                         vm_page_lock(p);
  213                         if (p->wire_count != 0 || p->hold_count != 0 ||
  214                             !pmap_page_exists_quick(pmap, p)) {
  215                                 vm_page_unlock(p);
  216                                 continue;
  217                         }
  218                         act_delta = pmap_ts_referenced(p);
  219                         if ((p->aflags & PGA_REFERENCED) != 0) {
  220                                 if (act_delta == 0)
  221                                         act_delta = 1;
  222                                 vm_page_aflag_clear(p, PGA_REFERENCED);
  223                         }
  224                         if (!vm_page_active(p) && act_delta != 0) {
  225                                 vm_page_activate(p);
  226                                 p->act_count += act_delta;
  227                         } else if (vm_page_active(p)) {
  228                                 if (act_delta == 0) {
  229                                         p->act_count -= min(p->act_count,
  230                                             ACT_DECLINE);
  231                                         if (!remove_mode && p->act_count == 0) {
  232                                                 pmap_remove_all(p);
  233                                                 vm_page_deactivate(p);
  234                                         } else
  235                                                 vm_page_requeue(p);
  236                                 } else {
  237                                         vm_page_activate(p);
  238                                         if (p->act_count < ACT_MAX -
  239                                             ACT_ADVANCE)
  240                                                 p->act_count += ACT_ADVANCE;
  241                                         vm_page_requeue(p);
  242                                 }
  243                         } else if (vm_page_inactive(p))
  244                                 pmap_remove_all(p);
  245                         vm_page_unlock(p);
  246                 }
  247                 if ((backing_object = object->backing_object) == NULL)
  248                         goto unlock_return;
  249                 VM_OBJECT_RLOCK(backing_object);
  250                 if (object != first_object)
  251                         VM_OBJECT_RUNLOCK(object);
  252         }
  253 unlock_return:
  254         if (object != first_object)
  255                 VM_OBJECT_RUNLOCK(object);
  256 }
  257 
  258 /*
  259  * deactivate some number of pages in a map, try to do it fairly, but
  260  * that is really hard to do.
  261  */
  262 static void
  263 vm_swapout_map_deactivate_pages(vm_map_t map, long desired)
  264 {
  265         vm_map_entry_t tmpe;
  266         vm_object_t obj, bigobj;
  267         int nothingwired;
  268 
  269         if (!vm_map_trylock_read(map))
  270                 return;
  271 
  272         bigobj = NULL;
  273         nothingwired = TRUE;
  274 
  275         /*
  276          * first, search out the biggest object, and try to free pages from
  277          * that.
  278          */
  279         tmpe = map->header.next;
  280         while (tmpe != &map->header) {
  281                 if ((tmpe->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) {
  282                         obj = tmpe->object.vm_object;
  283                         if (obj != NULL && VM_OBJECT_TRYRLOCK(obj)) {
  284                                 if (obj->shadow_count <= 1 &&
  285                                     (bigobj == NULL ||
  286                                      bigobj->resident_page_count <
  287                                      obj->resident_page_count)) {
  288                                         if (bigobj != NULL)
  289                                                 VM_OBJECT_RUNLOCK(bigobj);
  290                                         bigobj = obj;
  291                                 } else
  292                                         VM_OBJECT_RUNLOCK(obj);
  293                         }
  294                 }
  295                 if (tmpe->wired_count > 0)
  296                         nothingwired = FALSE;
  297                 tmpe = tmpe->next;
  298         }
  299 
  300         if (bigobj != NULL) {
  301                 vm_swapout_object_deactivate_pages(map->pmap, bigobj, desired);
  302                 VM_OBJECT_RUNLOCK(bigobj);
  303         }
  304         /*
  305          * Next, hunt around for other pages to deactivate.  We actually
  306          * do this search sort of wrong -- .text first is not the best idea.
  307          */
  308         tmpe = map->header.next;
  309         while (tmpe != &map->header) {
  310                 if (pmap_resident_count(vm_map_pmap(map)) <= desired)
  311                         break;
  312                 if ((tmpe->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) {
  313                         obj = tmpe->object.vm_object;
  314                         if (obj != NULL) {
  315                                 VM_OBJECT_RLOCK(obj);
  316                                 vm_swapout_object_deactivate_pages(map->pmap,
  317                                     obj, desired);
  318                                 VM_OBJECT_RUNLOCK(obj);
  319                         }
  320                 }
  321                 tmpe = tmpe->next;
  322         }
  323 
  324         /*
  325          * Remove all mappings if a process is swapped out, this will free page
  326          * table pages.
  327          */
  328         if (desired == 0 && nothingwired) {
  329                 pmap_remove(vm_map_pmap(map), vm_map_min(map),
  330                     vm_map_max(map));
  331         }
  332 
  333         vm_map_unlock_read(map);
  334 }
  335 
  336 /*
  337  * Swap out requests
  338  */
  339 #define VM_SWAP_NORMAL 1
  340 #define VM_SWAP_IDLE 2
  341 
  342 void
  343 vm_swapout_run(void)
  344 {
  345 
  346         if (vm_swap_enabled)
  347                 vm_req_vmdaemon(VM_SWAP_NORMAL);
  348 }
  349 
  350 /*
  351  * Idle process swapout -- run once per second when pagedaemons are
  352  * reclaiming pages.
  353  */
  354 void
  355 vm_swapout_run_idle(void)
  356 {
  357         static long lsec;
  358 
  359         if (!vm_swap_idle_enabled || time_second == lsec)
  360                 return;
  361         vm_req_vmdaemon(VM_SWAP_IDLE);
  362         lsec = time_second;
  363 }
  364 
  365 static void
  366 vm_req_vmdaemon(int req)
  367 {
  368         static int lastrun = 0;
  369 
  370         mtx_lock(&vm_daemon_mtx);
  371         vm_pageout_req_swapout |= req;
  372         if ((ticks > (lastrun + hz)) || (ticks < lastrun)) {
  373                 wakeup(&vm_daemon_needed);
  374                 lastrun = ticks;
  375         }
  376         mtx_unlock(&vm_daemon_mtx);
  377 }
  378 
  379 static void
  380 vm_daemon(void)
  381 {
  382         struct rlimit rsslim;
  383         struct proc *p;
  384         struct thread *td;
  385         struct vmspace *vm;
  386         int breakout, swapout_flags, tryagain, attempts;
  387 #ifdef RACCT
  388         uint64_t rsize, ravailable;
  389 #endif
  390 
  391         while (TRUE) {
  392                 mtx_lock(&vm_daemon_mtx);
  393                 msleep(&vm_daemon_needed, &vm_daemon_mtx, PPAUSE, "psleep",
  394 #ifdef RACCT
  395                     racct_enable ? hz : 0
  396 #else
  397                     0
  398 #endif
  399                 );
  400                 swapout_flags = vm_pageout_req_swapout;
  401                 vm_pageout_req_swapout = 0;
  402                 mtx_unlock(&vm_daemon_mtx);
  403                 if (swapout_flags)
  404                         swapout_procs(swapout_flags);
  405 
  406                 /*
  407                  * scan the processes for exceeding their rlimits or if
  408                  * process is swapped out -- deactivate pages
  409                  */
  410                 tryagain = 0;
  411                 attempts = 0;
  412 again:
  413                 attempts++;
  414                 sx_slock(&allproc_lock);
  415                 FOREACH_PROC_IN_SYSTEM(p) {
  416                         vm_pindex_t limit, size;
  417 
  418                         /*
  419                          * if this is a system process or if we have already
  420                          * looked at this process, skip it.
  421                          */
  422                         PROC_LOCK(p);
  423                         if (p->p_state != PRS_NORMAL ||
  424                             p->p_flag & (P_INEXEC | P_SYSTEM | P_WEXIT)) {
  425                                 PROC_UNLOCK(p);
  426                                 continue;
  427                         }
  428                         /*
  429                          * if the process is in a non-running type state,
  430                          * don't touch it.
  431                          */
  432                         breakout = 0;
  433                         FOREACH_THREAD_IN_PROC(p, td) {
  434                                 thread_lock(td);
  435                                 if (!TD_ON_RUNQ(td) &&
  436                                     !TD_IS_RUNNING(td) &&
  437                                     !TD_IS_SLEEPING(td) &&
  438                                     !TD_IS_SUSPENDED(td)) {
  439                                         thread_unlock(td);
  440                                         breakout = 1;
  441                                         break;
  442                                 }
  443                                 thread_unlock(td);
  444                         }
  445                         if (breakout) {
  446                                 PROC_UNLOCK(p);
  447                                 continue;
  448                         }
  449                         /*
  450                          * get a limit
  451                          */
  452                         lim_rlimit_proc(p, RLIMIT_RSS, &rsslim);
  453                         limit = OFF_TO_IDX(
  454                             qmin(rsslim.rlim_cur, rsslim.rlim_max));
  455 
  456                         /*
  457                          * let processes that are swapped out really be
  458                          * swapped out set the limit to nothing (will force a
  459                          * swap-out.)
  460                          */
  461                         if ((p->p_flag & P_INMEM) == 0)
  462                                 limit = 0;      /* XXX */
  463                         vm = vmspace_acquire_ref(p);
  464                         _PHOLD_LITE(p);
  465                         PROC_UNLOCK(p);
  466                         if (vm == NULL) {
  467                                 PRELE(p);
  468                                 continue;
  469                         }
  470                         sx_sunlock(&allproc_lock);
  471 
  472                         size = vmspace_resident_count(vm);
  473                         if (size >= limit) {
  474                                 vm_swapout_map_deactivate_pages(
  475                                     &vm->vm_map, limit);
  476                                 size = vmspace_resident_count(vm);
  477                         }
  478 #ifdef RACCT
  479                         if (racct_enable) {
  480                                 rsize = IDX_TO_OFF(size);
  481                                 PROC_LOCK(p);
  482                                 if (p->p_state == PRS_NORMAL)
  483                                         racct_set(p, RACCT_RSS, rsize);
  484                                 ravailable = racct_get_available(p, RACCT_RSS);
  485                                 PROC_UNLOCK(p);
  486                                 if (rsize > ravailable) {
  487                                         /*
  488                                          * Don't be overly aggressive; this
  489                                          * might be an innocent process,
  490                                          * and the limit could've been exceeded
  491                                          * by some memory hog.  Don't try
  492                                          * to deactivate more than 1/4th
  493                                          * of process' resident set size.
  494                                          */
  495                                         if (attempts <= 8) {
  496                                                 if (ravailable < rsize -
  497                                                     (rsize / 4)) {
  498                                                         ravailable = rsize -
  499                                                             (rsize / 4);
  500                                                 }
  501                                         }
  502                                         vm_swapout_map_deactivate_pages(
  503                                             &vm->vm_map,
  504                                             OFF_TO_IDX(ravailable));
  505                                         /* Update RSS usage after paging out. */
  506                                         size = vmspace_resident_count(vm);
  507                                         rsize = IDX_TO_OFF(size);
  508                                         PROC_LOCK(p);
  509                                         if (p->p_state == PRS_NORMAL)
  510                                                 racct_set(p, RACCT_RSS, rsize);
  511                                         PROC_UNLOCK(p);
  512                                         if (rsize > ravailable)
  513                                                 tryagain = 1;
  514                                 }
  515                         }
  516 #endif
  517                         vmspace_free(vm);
  518                         sx_slock(&allproc_lock);
  519                         PRELE(p);
  520                 }
  521                 sx_sunlock(&allproc_lock);
  522                 if (tryagain != 0 && attempts <= 10) {
  523                         maybe_yield();
  524                         goto again;
  525                 }
  526         }
  527 }
  528 
  529 /*
  530  * Allow a thread's kernel stack to be paged out.
  531  */
  532 static void
  533 vm_thread_swapout(struct thread *td)
  534 {
  535         vm_object_t ksobj;
  536         vm_page_t m;
  537         int i, pages;
  538 
  539         cpu_thread_swapout(td);
  540         pages = td->td_kstack_pages;
  541         ksobj = td->td_kstack_obj;
  542         pmap_qremove(td->td_kstack, pages);
  543         VM_OBJECT_WLOCK(ksobj);
  544         for (i = 0; i < pages; i++) {
  545                 m = vm_page_lookup(ksobj, i);
  546                 if (m == NULL)
  547                         panic("vm_thread_swapout: kstack already missing?");
  548                 vm_page_dirty(m);
  549                 vm_page_lock(m);
  550                 vm_page_unwire(m, PQ_INACTIVE);
  551                 vm_page_unlock(m);
  552         }
  553         VM_OBJECT_WUNLOCK(ksobj);
  554 }
  555 
  556 /*
  557  * Bring the kernel stack for a specified thread back in.
  558  */
  559 static void
  560 vm_thread_swapin(struct thread *td, int oom_alloc)
  561 {
  562         vm_object_t ksobj;
  563         vm_page_t ma[KSTACK_MAX_PAGES];
  564         int a, count, i, j, pages, rv;
  565 
  566         pages = td->td_kstack_pages;
  567         ksobj = td->td_kstack_obj;
  568         VM_OBJECT_WLOCK(ksobj);
  569         (void)vm_page_grab_pages(ksobj, 0, oom_alloc | VM_ALLOC_WIRED, ma,
  570             pages);
  571         for (i = 0; i < pages;) {
  572                 vm_page_assert_xbusied(ma[i]);
  573                 if (ma[i]->valid == VM_PAGE_BITS_ALL) {
  574                         vm_page_xunbusy(ma[i]);
  575                         i++;
  576                         continue;
  577                 }
  578                 vm_object_pip_add(ksobj, 1);
  579                 for (j = i + 1; j < pages; j++)
  580                         if (ma[j]->valid == VM_PAGE_BITS_ALL)
  581                                 break;
  582                 rv = vm_pager_has_page(ksobj, ma[i]->pindex, NULL, &a);
  583                 KASSERT(rv == 1, ("%s: missing page %p", __func__, ma[i]));
  584                 count = min(a + 1, j - i);
  585                 rv = vm_pager_get_pages(ksobj, ma + i, count, NULL, NULL);
  586                 KASSERT(rv == VM_PAGER_OK, ("%s: cannot get kstack for proc %d",
  587                     __func__, td->td_proc->p_pid));
  588                 vm_object_pip_wakeup(ksobj);
  589                 for (j = i; j < i + count; j++)
  590                         vm_page_xunbusy(ma[j]);
  591                 i += count;
  592         }
  593         VM_OBJECT_WUNLOCK(ksobj);
  594         pmap_qenter(td->td_kstack, ma, pages);
  595         cpu_thread_swapin(td);
  596 }
  597 
  598 void
  599 faultin(struct proc *p)
  600 {
  601         struct thread *td;
  602         int oom_alloc;
  603 
  604         PROC_LOCK_ASSERT(p, MA_OWNED);
  605 
  606         /*
  607          * If another process is swapping in this process,
  608          * just wait until it finishes.
  609          */
  610         if (p->p_flag & P_SWAPPINGIN) {
  611                 while (p->p_flag & P_SWAPPINGIN)
  612                         msleep(&p->p_flag, &p->p_mtx, PVM, "faultin", 0);
  613                 return;
  614         }
  615 
  616         if ((p->p_flag & P_INMEM) == 0) {
  617                 oom_alloc = (p->p_flag & P_WKILLED) != 0 ? VM_ALLOC_SYSTEM :
  618                     VM_ALLOC_NORMAL;
  619 
  620                 /*
  621                  * Don't let another thread swap process p out while we are
  622                  * busy swapping it in.
  623                  */
  624                 ++p->p_lock;
  625                 p->p_flag |= P_SWAPPINGIN;
  626                 PROC_UNLOCK(p);
  627                 sx_xlock(&allproc_lock);
  628                 MPASS(swapped_cnt > 0);
  629                 swapped_cnt--;
  630                 if (curthread != &thread0)
  631                         swap_inprogress++;
  632                 sx_xunlock(&allproc_lock);
  633 
  634                 /*
  635                  * We hold no lock here because the list of threads
  636                  * can not change while all threads in the process are
  637                  * swapped out.
  638                  */
  639                 FOREACH_THREAD_IN_PROC(p, td)
  640                         vm_thread_swapin(td, oom_alloc);
  641 
  642                 if (curthread != &thread0) {
  643                         sx_xlock(&allproc_lock);
  644                         MPASS(swap_inprogress > 0);
  645                         swap_inprogress--;
  646                         last_swapin = ticks;
  647                         sx_xunlock(&allproc_lock);
  648                 }
  649                 PROC_LOCK(p);
  650                 swapclear(p);
  651                 p->p_swtick = ticks;
  652 
  653                 /* Allow other threads to swap p out now. */
  654                 wakeup(&p->p_flag);
  655                 --p->p_lock;
  656         }
  657 }
  658 
  659 /*
  660  * This swapin algorithm attempts to swap-in processes only if there
  661  * is enough space for them.  Of course, if a process waits for a long
  662  * time, it will be swapped in anyway.
  663  */
  664 
  665 static struct proc *
  666 swapper_selector(bool wkilled_only)
  667 {
  668         struct proc *p, *res;
  669         struct thread *td;
  670         int ppri, pri, slptime, swtime;
  671 
  672         sx_assert(&allproc_lock, SA_SLOCKED);
  673         if (swapped_cnt == 0)
  674                 return (NULL);
  675         res = NULL;
  676         ppri = INT_MIN;
  677         FOREACH_PROC_IN_SYSTEM(p) {
  678                 PROC_LOCK(p);
  679                 if (p->p_state == PRS_NEW || (p->p_flag & (P_SWAPPINGOUT |
  680                     P_SWAPPINGIN | P_INMEM)) != 0) {
  681                         PROC_UNLOCK(p);
  682                         continue;
  683                 }
  684                 if (p->p_state == PRS_NORMAL && (p->p_flag & P_WKILLED) != 0) {
  685                         /*
  686                          * A swapped-out process might have mapped a
  687                          * large portion of the system's pages as
  688                          * anonymous memory.  There is no other way to
  689                          * release the memory other than to kill the
  690                          * process, for which we need to swap it in.
  691                          */
  692                         return (p);
  693                 }
  694                 if (wkilled_only) {
  695                         PROC_UNLOCK(p);
  696                         continue;
  697                 }
  698                 swtime = (ticks - p->p_swtick) / hz;
  699                 FOREACH_THREAD_IN_PROC(p, td) {
  700                         /*
  701                          * An otherwise runnable thread of a process
  702                          * swapped out has only the TDI_SWAPPED bit set.
  703                          */
  704                         thread_lock(td);
  705                         if (td->td_inhibitors == TDI_SWAPPED) {
  706                                 slptime = (ticks - td->td_slptick) / hz;
  707                                 pri = swtime + slptime;
  708                                 if ((td->td_flags & TDF_SWAPINREQ) == 0)
  709                                         pri -= p->p_nice * 8;
  710                                 /*
  711                                  * if this thread is higher priority
  712                                  * and there is enough space, then select
  713                                  * this process instead of the previous
  714                                  * selection.
  715                                  */
  716                                 if (pri > ppri) {
  717                                         res = p;
  718                                         ppri = pri;
  719                                 }
  720                         }
  721                         thread_unlock(td);
  722                 }
  723                 PROC_UNLOCK(p);
  724         }
  725 
  726         if (res != NULL)
  727                 PROC_LOCK(res);
  728         return (res);
  729 }
  730 
  731 #define SWAPIN_INTERVAL (MAXSLP * hz / 2)
  732 
  733 /*
  734  * Limit swapper to swap in one non-WKILLED process in MAXSLP/2
  735  * interval, assuming that there is:
  736  * - no memory shortage;
  737  * - no parallel swap-ins;
  738  * - no other swap-ins in the current SWAPIN_INTERVAL.
  739  */
  740 static bool
  741 swapper_wkilled_only(void)
  742 {
  743 
  744         return (vm_page_count_min() || swap_inprogress > 0 ||
  745             (u_int)(ticks - last_swapin) < SWAPIN_INTERVAL);
  746 }
  747 
  748 void
  749 swapper(void)
  750 {
  751         struct proc *p;
  752 
  753         for (;;) {
  754                 sx_slock(&allproc_lock);
  755                 p = swapper_selector(swapper_wkilled_only());
  756                 sx_sunlock(&allproc_lock);
  757 
  758                 if (p == NULL) {
  759                         tsleep(&proc0, PVM, "swapin", SWAPIN_INTERVAL);
  760                 } else {
  761                         PROC_LOCK_ASSERT(p, MA_OWNED);
  762 
  763                         /*
  764                          * Another process may be bringing or may have
  765                          * already brought this process in while we
  766                          * traverse all threads.  Or, this process may
  767                          * have exited or even being swapped out
  768                          * again.
  769                          */
  770                         if (p->p_state == PRS_NORMAL && (p->p_flag & (P_INMEM |
  771                             P_SWAPPINGOUT | P_SWAPPINGIN)) == 0) {
  772                                 faultin(p);
  773                         }
  774                         PROC_UNLOCK(p);
  775                 }
  776         }
  777 }
  778 
  779 /*
  780  * First, if any processes have been sleeping or stopped for at least
  781  * "swap_idle_threshold1" seconds, they are swapped out.  If, however,
  782  * no such processes exist, then the longest-sleeping or stopped
  783  * process is swapped out.  Finally, and only as a last resort, if
  784  * there are no sleeping or stopped processes, the longest-resident
  785  * process is swapped out.
  786  */
  787 static void
  788 swapout_procs(int action)
  789 {
  790         struct proc *p;
  791         struct thread *td;
  792         int slptime;
  793         bool didswap, doswap;
  794 
  795         MPASS((action & (VM_SWAP_NORMAL | VM_SWAP_IDLE)) != 0);
  796 
  797         didswap = false;
  798         sx_slock(&allproc_lock);
  799         FOREACH_PROC_IN_SYSTEM(p) {
  800                 /*
  801                  * Filter out not yet fully constructed processes.  Do
  802                  * not swap out held processes.  Avoid processes which
  803                  * are system, exiting, execing, traced, already swapped
  804                  * out or are in the process of being swapped in or out.
  805                  */
  806                 PROC_LOCK(p);
  807                 if (p->p_state != PRS_NORMAL || p->p_lock != 0 || (p->p_flag &
  808                     (P_SYSTEM | P_WEXIT | P_INEXEC | P_STOPPED_SINGLE |
  809                     P_TRACED | P_SWAPPINGOUT | P_SWAPPINGIN | P_INMEM)) !=
  810                     P_INMEM) {
  811                         PROC_UNLOCK(p);
  812                         continue;
  813                 }
  814 
  815                 /*
  816                  * Further consideration of this process for swap out
  817                  * requires iterating over its threads.  We release
  818                  * allproc_lock here so that process creation and
  819                  * destruction are not blocked while we iterate.
  820                  *
  821                  * To later reacquire allproc_lock and resume
  822                  * iteration over the allproc list, we will first have
  823                  * to release the lock on the process.  We place a
  824                  * hold on the process so that it remains in the
  825                  * allproc list while it is unlocked.
  826                  */
  827                 _PHOLD_LITE(p);
  828                 sx_sunlock(&allproc_lock);
  829 
  830                 /*
  831                  * Do not swapout a realtime process.
  832                  * Guarantee swap_idle_threshold1 time in memory.
  833                  * If the system is under memory stress, or if we are
  834                  * swapping idle processes >= swap_idle_threshold2,
  835                  * then swap the process out.
  836                  */
  837                 doswap = true;
  838                 FOREACH_THREAD_IN_PROC(p, td) {
  839                         thread_lock(td);
  840                         slptime = (ticks - td->td_slptick) / hz;
  841                         if (PRI_IS_REALTIME(td->td_pri_class) ||
  842                             slptime < swap_idle_threshold1 ||
  843                             !thread_safetoswapout(td) ||
  844                             ((action & VM_SWAP_NORMAL) == 0 &&
  845                             slptime < swap_idle_threshold2))
  846                                 doswap = false;
  847                         thread_unlock(td);
  848                         if (!doswap)
  849                                 break;
  850                 }
  851                 if (doswap && swapout(p) == 0)
  852                         didswap = true;
  853 
  854                 PROC_UNLOCK(p);
  855                 if (didswap) {
  856                         sx_xlock(&allproc_lock);
  857                         swapped_cnt++;
  858                         sx_downgrade(&allproc_lock);
  859                 } else
  860                         sx_slock(&allproc_lock);
  861                 PRELE(p);
  862         }
  863         sx_sunlock(&allproc_lock);
  864 
  865         /*
  866          * If we swapped something out, and another process needed memory,
  867          * then wakeup the sched process.
  868          */
  869         if (didswap)
  870                 wakeup(&proc0);
  871 }
  872 
  873 static void
  874 swapclear(struct proc *p)
  875 {
  876         struct thread *td;
  877 
  878         PROC_LOCK_ASSERT(p, MA_OWNED);
  879 
  880         FOREACH_THREAD_IN_PROC(p, td) {
  881                 thread_lock(td);
  882                 td->td_flags |= TDF_INMEM;
  883                 td->td_flags &= ~TDF_SWAPINREQ;
  884                 TD_CLR_SWAPPED(td);
  885                 if (TD_CAN_RUN(td))
  886                         if (setrunnable(td)) {
  887 #ifdef INVARIANTS
  888                                 /*
  889                                  * XXX: We just cleared TDI_SWAPPED
  890                                  * above and set TDF_INMEM, so this
  891                                  * should never happen.
  892                                  */
  893                                 panic("not waking up swapper");
  894 #endif
  895                         }
  896                 thread_unlock(td);
  897         }
  898         p->p_flag &= ~(P_SWAPPINGIN | P_SWAPPINGOUT);
  899         p->p_flag |= P_INMEM;
  900 }
  901 
  902 static int
  903 swapout(struct proc *p)
  904 {
  905         struct thread *td;
  906 
  907         PROC_LOCK_ASSERT(p, MA_OWNED);
  908 
  909         /*
  910          * The states of this process and its threads may have changed
  911          * by now.  Assuming that there is only one pageout daemon thread,
  912          * this process should still be in memory.
  913          */
  914         KASSERT((p->p_flag & (P_INMEM | P_SWAPPINGOUT | P_SWAPPINGIN)) ==
  915             P_INMEM, ("swapout: lost a swapout race?"));
  916 
  917         /*
  918          * Remember the resident count.
  919          */
  920         p->p_vmspace->vm_swrss = vmspace_resident_count(p->p_vmspace);
  921 
  922         /*
  923          * Check and mark all threads before we proceed.
  924          */
  925         p->p_flag &= ~P_INMEM;
  926         p->p_flag |= P_SWAPPINGOUT;
  927         FOREACH_THREAD_IN_PROC(p, td) {
  928                 thread_lock(td);
  929                 if (!thread_safetoswapout(td)) {
  930                         thread_unlock(td);
  931                         swapclear(p);
  932                         return (EBUSY);
  933                 }
  934                 td->td_flags &= ~TDF_INMEM;
  935                 TD_SET_SWAPPED(td);
  936                 thread_unlock(td);
  937         }
  938         td = FIRST_THREAD_IN_PROC(p);
  939         ++td->td_ru.ru_nswap;
  940         PROC_UNLOCK(p);
  941 
  942         /*
  943          * This list is stable because all threads are now prevented from
  944          * running.  The list is only modified in the context of a running
  945          * thread in this process.
  946          */
  947         FOREACH_THREAD_IN_PROC(p, td)
  948                 vm_thread_swapout(td);
  949 
  950         PROC_LOCK(p);
  951         p->p_flag &= ~P_SWAPPINGOUT;
  952         p->p_swtick = ticks;
  953         return (0);
  954 }
Cache object: ed8e47daa44d91479c8d3f13fb7b718e
[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]
This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.
FreeBSD/Linux Kernel Cross Reference sys/vm/vm_swapout.c

FreeBSD/Linux Kernel Cross Reference
sys/vm/vm_swapout.c