The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/vm/vm_glue.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  * Copyright (c) 1991, 1993
    3  *      The Regents of the University of California.  All rights reserved.
    4  *
    5  * This code is derived from software contributed to Berkeley by
    6  * The Mach Operating System project at Carnegie-Mellon University.
    7  *
    8  * Redistribution and use in source and binary forms, with or without
    9  * modification, are permitted provided that the following conditions
   10  * are met:
   11  * 1. Redistributions of source code must retain the above copyright
   12  *    notice, this list of conditions and the following disclaimer.
   13  * 2. Redistributions in binary form must reproduce the above copyright
   14  *    notice, this list of conditions and the following disclaimer in the
   15  *    documentation and/or other materials provided with the distribution.
   16  * 3. All advertising materials mentioning features or use of this software
   17  *    must display the following acknowledgement:
   18  *      This product includes software developed by the University of
   19  *      California, Berkeley and its contributors.
   20  * 4. Neither the name of the University nor the names of its contributors
   21  *    may be used to endorse or promote products derived from this software
   22  *    without specific prior written permission.
   23  *
   24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   34  * SUCH DAMAGE.
   35  *
   36  *      from: @(#)vm_glue.c     8.6 (Berkeley) 1/5/94
   37  *
   38  *
   39  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
   40  * All rights reserved.
   41  *
   42  * Permission to use, copy, modify and distribute this software and
   43  * its documentation is hereby granted, provided that both the copyright
   44  * notice and this permission notice appear in all copies of the
   45  * software, derivative works or modified versions, and any portions
   46  * thereof, and that both notices appear in supporting documentation.
   47  *
   48  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
   49  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
   50  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
   51  *
   52  * Carnegie Mellon requests users of this software to return to
   53  *
   54  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
   55  *  School of Computer Science
   56  *  Carnegie Mellon University
   57  *  Pittsburgh PA 15213-3890
   58  *
   59  * any improvements or extensions that they make and grant Carnegie the
   60  * rights to redistribute these changes.
   61  *
   62  * $FreeBSD: releng/5.1/sys/vm/vm_glue.c 114983 2003-05-13 20:36:02Z jhb $
   63  */
   64 
   65 #include "opt_vm.h"
   66 
   67 #include <sys/param.h>
   68 #include <sys/systm.h>
   69 #include <sys/limits.h>
   70 #include <sys/lock.h>
   71 #include <sys/mutex.h>
   72 #include <sys/proc.h>
   73 #include <sys/resourcevar.h>
   74 #include <sys/shm.h>
   75 #include <sys/vmmeter.h>
   76 #include <sys/sx.h>
   77 #include <sys/sysctl.h>
   78 
   79 #include <sys/kernel.h>
   80 #include <sys/ktr.h>
   81 #include <sys/unistd.h>
   82 
   83 #include <vm/vm.h>
   84 #include <vm/vm_param.h>
   85 #include <vm/pmap.h>
   86 #include <vm/vm_map.h>
   87 #include <vm/vm_page.h>
   88 #include <vm/vm_pageout.h>
   89 #include <vm/vm_object.h>
   90 #include <vm/vm_kern.h>
   91 #include <vm/vm_extern.h>
   92 #include <vm/vm_pager.h>
   93 #include <vm/swap_pager.h>
   94 
   95 #include <sys/user.h>
   96 
   97 extern int maxslp;
   98 
   99 /*
  100  * System initialization
  101  *
  102  * Note: proc0 from proc.h
  103  */
  104 static void vm_init_limits(void *);
  105 SYSINIT(vm_limits, SI_SUB_VM_CONF, SI_ORDER_FIRST, vm_init_limits, &proc0)
  106 
  107 /*
  108  * THIS MUST BE THE LAST INITIALIZATION ITEM!!!
  109  *
  110  * Note: run scheduling should be divorced from the vm system.
  111  */
  112 static void scheduler(void *);
  113 SYSINIT(scheduler, SI_SUB_RUN_SCHEDULER, SI_ORDER_FIRST, scheduler, NULL)
  114 
  115 #ifndef NO_SWAPPING
  116 static void swapout(struct proc *);
  117 static void vm_proc_swapin(struct proc *p);
  118 static void vm_proc_swapout(struct proc *p);
  119 #endif
  120 
  121 /*
  122  * MPSAFE
  123  *
  124  * WARNING!  This code calls vm_map_check_protection() which only checks
  125  * the associated vm_map_entry range.  It does not determine whether the
  126  * contents of the memory is actually readable or writable.  In most cases
  127  * just checking the vm_map_entry is sufficient within the kernel's address
  128  * space.
  129  */
  130 int
  131 kernacc(addr, len, rw)
  132         void *addr;
  133         int len, rw;
  134 {
  135         boolean_t rv;
  136         vm_offset_t saddr, eaddr;
  137         vm_prot_t prot;
  138 
  139         KASSERT((rw & ~VM_PROT_ALL) == 0,
  140             ("illegal ``rw'' argument to kernacc (%x)\n", rw));
  141         prot = rw;
  142         saddr = trunc_page((vm_offset_t)addr);
  143         eaddr = round_page((vm_offset_t)addr + len);
  144         rv = vm_map_check_protection(kernel_map, saddr, eaddr, prot);
  145         return (rv == TRUE);
  146 }
  147 
  148 /*
  149  * MPSAFE
  150  *
  151  * WARNING!  This code calls vm_map_check_protection() which only checks
  152  * the associated vm_map_entry range.  It does not determine whether the
  153  * contents of the memory is actually readable or writable.  vmapbuf(),
  154  * vm_fault_quick(), or copyin()/copout()/su*()/fu*() functions should be
  155  * used in conjuction with this call.
  156  */
  157 int
  158 useracc(addr, len, rw)
  159         void *addr;
  160         int len, rw;
  161 {
  162         boolean_t rv;
  163         vm_prot_t prot;
  164         vm_map_t map;
  165 
  166         KASSERT((rw & ~VM_PROT_ALL) == 0,
  167             ("illegal ``rw'' argument to useracc (%x)\n", rw));
  168         prot = rw;
  169         map = &curproc->p_vmspace->vm_map;
  170         if ((vm_offset_t)addr + len > vm_map_max(map) ||
  171             (vm_offset_t)addr + len < (vm_offset_t)addr) {
  172                 return (FALSE);
  173         }
  174         rv = vm_map_check_protection(map, trunc_page((vm_offset_t)addr),
  175             round_page((vm_offset_t)addr + len), prot);
  176         return (rv == TRUE);
  177 }
  178 
  179 /*
  180  * MPSAFE
  181  */
  182 void
  183 vslock(addr, len)
  184         void *addr;
  185         u_int len;
  186 {
  187 
  188         vm_map_wire(&curproc->p_vmspace->vm_map, trunc_page((vm_offset_t)addr),
  189             round_page((vm_offset_t)addr + len), FALSE);
  190 }
  191 
  192 /*
  193  * MPSAFE
  194  */
  195 void
  196 vsunlock(addr, len)
  197         void *addr;
  198         u_int len;
  199 {
  200 
  201         vm_map_unwire(&curproc->p_vmspace->vm_map,
  202             trunc_page((vm_offset_t)addr),
  203             round_page((vm_offset_t)addr + len), FALSE);
  204 }
  205 
  206 /*
  207  * Create the U area for a new process.
  208  * This routine directly affects the fork perf for a process.
  209  */
  210 void
  211 vm_proc_new(struct proc *p)
  212 {
  213         vm_page_t ma[UAREA_PAGES];
  214         vm_object_t upobj;
  215         vm_offset_t up;
  216         vm_page_t m;
  217         u_int i;
  218 
  219         /*
  220          * Allocate object for the upage.
  221          */
  222         upobj = vm_object_allocate(OBJT_DEFAULT, UAREA_PAGES);
  223         p->p_upages_obj = upobj;
  224 
  225         /*
  226          * Get a kernel virtual address for the U area for this process.
  227          */
  228         up = kmem_alloc_nofault(kernel_map, UAREA_PAGES * PAGE_SIZE);
  229         if (up == 0)
  230                 panic("vm_proc_new: upage allocation failed");
  231         p->p_uarea = (struct user *)up;
  232 
  233         for (i = 0; i < UAREA_PAGES; i++) {
  234                 /*
  235                  * Get a uarea page.
  236                  */
  237                 m = vm_page_grab(upobj, i,
  238                     VM_ALLOC_NORMAL | VM_ALLOC_RETRY | VM_ALLOC_WIRED);
  239                 ma[i] = m;
  240 
  241                 vm_page_lock_queues();
  242                 vm_page_wakeup(m);
  243                 vm_page_flag_clear(m, PG_ZERO);
  244                 m->valid = VM_PAGE_BITS_ALL;
  245                 vm_page_unlock_queues();
  246         }
  247 
  248         /*
  249          * Enter the pages into the kernel address space.
  250          */
  251         pmap_qenter(up, ma, UAREA_PAGES);
  252 }
  253 
  254 /*
  255  * Dispose the U area for a process that has exited.
  256  * This routine directly impacts the exit perf of a process.
  257  * XXX proc_zone is marked UMA_ZONE_NOFREE, so this should never be called.
  258  */
  259 void
  260 vm_proc_dispose(struct proc *p)
  261 {
  262         vm_object_t upobj;
  263         vm_offset_t up;
  264         vm_page_t m;
  265 
  266         upobj = p->p_upages_obj;
  267         VM_OBJECT_LOCK(upobj);
  268         if (upobj->resident_page_count != UAREA_PAGES)
  269                 panic("vm_proc_dispose: incorrect number of pages in upobj");
  270         vm_page_lock_queues();
  271         while ((m = TAILQ_FIRST(&upobj->memq)) != NULL) {
  272                 vm_page_busy(m);
  273                 vm_page_unwire(m, 0);
  274                 vm_page_free(m);
  275         }
  276         vm_page_unlock_queues();
  277         VM_OBJECT_UNLOCK(upobj);
  278         up = (vm_offset_t)p->p_uarea;
  279         pmap_qremove(up, UAREA_PAGES);
  280         kmem_free(kernel_map, up, UAREA_PAGES * PAGE_SIZE);
  281         vm_object_deallocate(upobj);
  282 }
  283 
  284 #ifndef NO_SWAPPING
  285 /*
  286  * Allow the U area for a process to be prejudicially paged out.
  287  */
  288 static void
  289 vm_proc_swapout(struct proc *p)
  290 {
  291         vm_object_t upobj;
  292         vm_offset_t up;
  293         vm_page_t m;
  294 
  295         upobj = p->p_upages_obj;
  296         VM_OBJECT_LOCK(upobj);
  297         if (upobj->resident_page_count != UAREA_PAGES)
  298                 panic("vm_proc_dispose: incorrect number of pages in upobj");
  299         vm_page_lock_queues();
  300         TAILQ_FOREACH(m, &upobj->memq, listq) {
  301                 vm_page_dirty(m);
  302                 vm_page_unwire(m, 0);
  303         }
  304         vm_page_unlock_queues();
  305         VM_OBJECT_UNLOCK(upobj);
  306         up = (vm_offset_t)p->p_uarea;
  307         pmap_qremove(up, UAREA_PAGES);
  308 }
  309 
  310 /*
  311  * Bring the U area for a specified process back in.
  312  */
  313 static void
  314 vm_proc_swapin(struct proc *p)
  315 {
  316         vm_page_t ma[UAREA_PAGES];
  317         vm_object_t upobj;
  318         vm_offset_t up;
  319         vm_page_t m;
  320         int rv;
  321         int i;
  322 
  323         upobj = p->p_upages_obj;
  324         for (i = 0; i < UAREA_PAGES; i++) {
  325                 m = vm_page_grab(upobj, i, VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
  326                 if (m->valid != VM_PAGE_BITS_ALL) {
  327                         rv = vm_pager_get_pages(upobj, &m, 1, 0);
  328                         if (rv != VM_PAGER_OK)
  329                                 panic("vm_proc_swapin: cannot get upage");
  330                 }
  331                 ma[i] = m;
  332         }
  333         VM_OBJECT_LOCK(upobj);
  334         if (upobj->resident_page_count != UAREA_PAGES)
  335                 panic("vm_proc_swapin: lost pages from upobj");
  336         vm_page_lock_queues();
  337         TAILQ_FOREACH(m, &upobj->memq, listq) {
  338                 m->valid = VM_PAGE_BITS_ALL;
  339                 vm_page_wire(m);
  340                 vm_page_wakeup(m);
  341         }
  342         vm_page_unlock_queues();
  343         VM_OBJECT_UNLOCK(upobj);
  344         up = (vm_offset_t)p->p_uarea;
  345         pmap_qenter(up, ma, UAREA_PAGES);
  346 }
  347 
  348 /*
  349  * Swap in the UAREAs of all processes swapped out to the given device.
  350  * The pages in the UAREA are marked dirty and their swap metadata is freed.
  351  */
  352 void
  353 vm_proc_swapin_all(int devidx)
  354 {
  355         struct proc *p;
  356         vm_object_t object;
  357         vm_page_t m;
  358 
  359 retry:
  360         sx_slock(&allproc_lock);
  361         FOREACH_PROC_IN_SYSTEM(p) {
  362                 PROC_LOCK(p);
  363                 object = p->p_upages_obj;
  364                 if (object != NULL) {
  365                         VM_OBJECT_LOCK(object);
  366                         if (swap_pager_isswapped(object, devidx)) {
  367                                 VM_OBJECT_UNLOCK(object);
  368                                 sx_sunlock(&allproc_lock);
  369                                 faultin(p);
  370                                 PROC_UNLOCK(p);
  371                                 VM_OBJECT_LOCK(object);
  372                                 vm_page_lock_queues();
  373                                 TAILQ_FOREACH(m, &object->memq, listq)
  374                                         vm_page_dirty(m);
  375                                 vm_page_unlock_queues();
  376                                 swap_pager_freespace(object, 0,
  377                                     object->un_pager.swp.swp_bcount);
  378                                 VM_OBJECT_UNLOCK(object);
  379                                 goto retry;
  380                         }
  381                         VM_OBJECT_UNLOCK(object);
  382                 }
  383                 PROC_UNLOCK(p);
  384         }
  385         sx_sunlock(&allproc_lock);
  386 }
  387 #endif
  388 
  389 /*
  390  * Implement fork's actions on an address space.
  391  * Here we arrange for the address space to be copied or referenced,
  392  * allocate a user struct (pcb and kernel stack), then call the
  393  * machine-dependent layer to fill those in and make the new process
  394  * ready to run.  The new process is set up so that it returns directly
  395  * to user mode to avoid stack copying and relocation problems.
  396  */
  397 void
  398 vm_forkproc(td, p2, td2, flags)
  399         struct thread *td;
  400         struct proc *p2;
  401         struct thread *td2;
  402         int flags;
  403 {
  404         struct proc *p1 = td->td_proc;
  405         struct user *up;
  406 
  407         GIANT_REQUIRED;
  408 
  409         if ((flags & RFPROC) == 0) {
  410                 /*
  411                  * Divorce the memory, if it is shared, essentially
  412                  * this changes shared memory amongst threads, into
  413                  * COW locally.
  414                  */
  415                 if ((flags & RFMEM) == 0) {
  416                         if (p1->p_vmspace->vm_refcnt > 1) {
  417                                 vmspace_unshare(p1);
  418                         }
  419                 }
  420                 cpu_fork(td, p2, td2, flags);
  421                 return;
  422         }
  423 
  424         if (flags & RFMEM) {
  425                 p2->p_vmspace = p1->p_vmspace;
  426                 p1->p_vmspace->vm_refcnt++;
  427         }
  428 
  429         while (vm_page_count_severe()) {
  430                 VM_WAIT;
  431         }
  432 
  433         if ((flags & RFMEM) == 0) {
  434                 p2->p_vmspace = vmspace_fork(p1->p_vmspace);
  435 
  436                 pmap_pinit2(vmspace_pmap(p2->p_vmspace));
  437 
  438                 if (p1->p_vmspace->vm_shm)
  439                         shmfork(p1, p2);
  440         }
  441 
  442         /* XXXKSE this is unsatisfactory but should be adequate */
  443         up = p2->p_uarea;
  444         MPASS(p2->p_sigacts != NULL);
  445 
  446         /*
  447          * p_stats currently points at fields in the user struct
  448          * but not at &u, instead at p_addr. Copy parts of
  449          * p_stats; zero the rest of p_stats (statistics).
  450          */
  451         p2->p_stats = &up->u_stats;
  452         bzero(&up->u_stats.pstat_startzero,
  453             (unsigned) ((caddr_t) &up->u_stats.pstat_endzero -
  454                 (caddr_t) &up->u_stats.pstat_startzero));
  455         bcopy(&p1->p_stats->pstat_startcopy, &up->u_stats.pstat_startcopy,
  456             ((caddr_t) &up->u_stats.pstat_endcopy -
  457                 (caddr_t) &up->u_stats.pstat_startcopy));
  458 
  459         /*
  460          * cpu_fork will copy and update the pcb, set up the kernel stack,
  461          * and make the child ready to run.
  462          */
  463         cpu_fork(td, p2, td2, flags);
  464 }
  465 
  466 /*
  467  * Called after process has been wait(2)'ed apon and is being reaped.
  468  * The idea is to reclaim resources that we could not reclaim while
  469  * the process was still executing.
  470  */
  471 void
  472 vm_waitproc(p)
  473         struct proc *p;
  474 {
  475 
  476         GIANT_REQUIRED;
  477         cpu_wait(p);
  478         vmspace_exitfree(p);            /* and clean-out the vmspace */
  479 }
  480 
  481 /*
  482  * Set default limits for VM system.
  483  * Called for proc 0, and then inherited by all others.
  484  *
  485  * XXX should probably act directly on proc0.
  486  */
  487 static void
  488 vm_init_limits(udata)
  489         void *udata;
  490 {
  491         struct proc *p = udata;
  492         int rss_limit;
  493 
  494         /*
  495          * Set up the initial limits on process VM. Set the maximum resident
  496          * set size to be half of (reasonably) available memory.  Since this
  497          * is a soft limit, it comes into effect only when the system is out
  498          * of memory - half of main memory helps to favor smaller processes,
  499          * and reduces thrashing of the object cache.
  500          */
  501         p->p_rlimit[RLIMIT_STACK].rlim_cur = dflssiz;
  502         p->p_rlimit[RLIMIT_STACK].rlim_max = maxssiz;
  503         p->p_rlimit[RLIMIT_DATA].rlim_cur = dfldsiz;
  504         p->p_rlimit[RLIMIT_DATA].rlim_max = maxdsiz;
  505         /* limit the limit to no less than 2MB */
  506         rss_limit = max(cnt.v_free_count, 512);
  507         p->p_rlimit[RLIMIT_RSS].rlim_cur = ptoa(rss_limit);
  508         p->p_rlimit[RLIMIT_RSS].rlim_max = RLIM_INFINITY;
  509 }
  510 
  511 void
  512 faultin(p)
  513         struct proc *p;
  514 {
  515 #ifdef NO_SWAPPING
  516 
  517         PROC_LOCK_ASSERT(p, MA_OWNED);
  518         if ((p->p_sflag & PS_INMEM) == 0)
  519                 panic("faultin: proc swapped out with NO_SWAPPING!");
  520 #else /* !NO_SWAPPING */
  521         struct thread *td;
  522 
  523         GIANT_REQUIRED;
  524         PROC_LOCK_ASSERT(p, MA_OWNED);
  525         /*
  526          * If another process is swapping in this process,
  527          * just wait until it finishes.
  528          */
  529         if (p->p_sflag & PS_SWAPPINGIN)
  530                 msleep(&p->p_sflag, &p->p_mtx, PVM, "faultin", 0);
  531         else if ((p->p_sflag & PS_INMEM) == 0) {
  532                 /*
  533                  * Don't let another thread swap process p out while we are
  534                  * busy swapping it in.
  535                  */
  536                 ++p->p_lock;
  537                 mtx_lock_spin(&sched_lock);
  538                 p->p_sflag |= PS_SWAPPINGIN;
  539                 mtx_unlock_spin(&sched_lock);
  540                 PROC_UNLOCK(p);
  541 
  542                 vm_proc_swapin(p);
  543                 FOREACH_THREAD_IN_PROC(p, td)
  544                         pmap_swapin_thread(td);
  545 
  546                 PROC_LOCK(p);
  547                 mtx_lock_spin(&sched_lock);
  548                 p->p_sflag &= ~PS_SWAPPINGIN;
  549                 p->p_sflag |= PS_INMEM;
  550                 FOREACH_THREAD_IN_PROC(p, td) {
  551                         TD_CLR_SWAPPED(td);
  552                         if (TD_CAN_RUN(td))
  553                                 setrunnable(td);
  554                 }
  555                 mtx_unlock_spin(&sched_lock);
  556 
  557                 wakeup(&p->p_sflag);
  558 
  559                 /* Allow other threads to swap p out now. */
  560                 --p->p_lock;
  561         }
  562 #endif /* NO_SWAPPING */
  563 }
  564 
  565 /*
  566  * This swapin algorithm attempts to swap-in processes only if there
  567  * is enough space for them.  Of course, if a process waits for a long
  568  * time, it will be swapped in anyway.
  569  *
  570  *  XXXKSE - process with the thread with highest priority counts..
  571  *
  572  * Giant is still held at this point, to be released in tsleep.
  573  */
  574 /* ARGSUSED*/
  575 static void
  576 scheduler(dummy)
  577         void *dummy;
  578 {
  579         struct proc *p;
  580         struct thread *td;
  581         int pri;
  582         struct proc *pp;
  583         int ppri;
  584 
  585         mtx_assert(&Giant, MA_OWNED | MA_NOTRECURSED);
  586         /* GIANT_REQUIRED */
  587 
  588 loop:
  589         if (vm_page_count_min()) {
  590                 VM_WAIT;
  591                 goto loop;
  592         }
  593 
  594         pp = NULL;
  595         ppri = INT_MIN;
  596         sx_slock(&allproc_lock);
  597         FOREACH_PROC_IN_SYSTEM(p) {
  598                 struct ksegrp *kg;
  599                 if (p->p_sflag & (PS_INMEM | PS_SWAPPINGOUT | PS_SWAPPINGIN)) {
  600                         continue;
  601                 }
  602                 mtx_lock_spin(&sched_lock);
  603                 FOREACH_THREAD_IN_PROC(p, td) {
  604                         /*
  605                          * An otherwise runnable thread of a process
  606                          * swapped out has only the TDI_SWAPPED bit set.
  607                          * 
  608                          */
  609                         if (td->td_inhibitors == TDI_SWAPPED) {
  610                                 kg = td->td_ksegrp;
  611                                 pri = p->p_swtime + kg->kg_slptime;
  612                                 if ((p->p_sflag & PS_SWAPINREQ) == 0) {
  613                                         pri -= kg->kg_nice * 8;
  614                                 }
  615 
  616                                 /*
  617                                  * if this ksegrp is higher priority
  618                                  * and there is enough space, then select
  619                                  * this process instead of the previous
  620                                  * selection.
  621                                  */
  622                                 if (pri > ppri) {
  623                                         pp = p;
  624                                         ppri = pri;
  625                                 }
  626                         }
  627                 }
  628                 mtx_unlock_spin(&sched_lock);
  629         }
  630         sx_sunlock(&allproc_lock);
  631 
  632         /*
  633          * Nothing to do, back to sleep.
  634          */
  635         if ((p = pp) == NULL) {
  636                 tsleep(&proc0, PVM, "sched", maxslp * hz / 2);
  637                 goto loop;
  638         }
  639         PROC_LOCK(p);
  640 
  641         /*
  642          * Another process may be bringing or may have already
  643          * brought this process in while we traverse all threads.
  644          * Or, this process may even be being swapped out again.
  645          */
  646         if (p->p_sflag & (PS_INMEM | PS_SWAPPINGOUT | PS_SWAPPINGIN)) {
  647                 PROC_UNLOCK(p);
  648                 goto loop;
  649         }
  650 
  651         mtx_lock_spin(&sched_lock);
  652         p->p_sflag &= ~PS_SWAPINREQ;
  653         mtx_unlock_spin(&sched_lock);
  654 
  655         /*
  656          * We would like to bring someone in. (only if there is space).
  657          * [What checks the space? ]
  658          */
  659         faultin(p);
  660         PROC_UNLOCK(p);
  661         mtx_lock_spin(&sched_lock);
  662         p->p_swtime = 0;
  663         mtx_unlock_spin(&sched_lock);
  664         goto loop;
  665 }
  666 
  667 #ifndef NO_SWAPPING
  668 
  669 /*
  670  * Swap_idle_threshold1 is the guaranteed swapped in time for a process
  671  */
  672 static int swap_idle_threshold1 = 2;
  673 SYSCTL_INT(_vm, OID_AUTO, swap_idle_threshold1, CTLFLAG_RW,
  674     &swap_idle_threshold1, 0, "Guaranteed swapped in time for a process");
  675 
  676 /*
  677  * Swap_idle_threshold2 is the time that a process can be idle before
  678  * it will be swapped out, if idle swapping is enabled.
  679  */
  680 static int swap_idle_threshold2 = 10;
  681 SYSCTL_INT(_vm, OID_AUTO, swap_idle_threshold2, CTLFLAG_RW,
  682     &swap_idle_threshold2, 0, "Time before a process will be swapped out");
  683 
  684 /*
  685  * Swapout is driven by the pageout daemon.  Very simple, we find eligible
  686  * procs and unwire their u-areas.  We try to always "swap" at least one
  687  * process in case we need the room for a swapin.
  688  * If any procs have been sleeping/stopped for at least maxslp seconds,
  689  * they are swapped.  Else, we swap the longest-sleeping or stopped process,
  690  * if any, otherwise the longest-resident process.
  691  */
  692 void
  693 swapout_procs(action)
  694 int action;
  695 {
  696         struct proc *p;
  697         struct thread *td;
  698         struct ksegrp *kg;
  699         struct proc *outp, *outp2;
  700         int outpri, outpri2;
  701         int didswap = 0;
  702 
  703         GIANT_REQUIRED;
  704 
  705         outp = outp2 = NULL;
  706         outpri = outpri2 = INT_MIN;
  707 retry:
  708         sx_slock(&allproc_lock);
  709         FOREACH_PROC_IN_SYSTEM(p) {
  710                 struct vmspace *vm;
  711                 int minslptime = 100000;
  712                 
  713                 /*
  714                  * Watch out for a process in
  715                  * creation.  It may have no
  716                  * address space or lock yet.
  717                  */
  718                 mtx_lock_spin(&sched_lock);
  719                 if (p->p_state == PRS_NEW) {
  720                         mtx_unlock_spin(&sched_lock);
  721                         continue;
  722                 }
  723                 mtx_unlock_spin(&sched_lock);
  724 
  725                 /*
  726                  * An aio daemon switches its
  727                  * address space while running.
  728                  * Perform a quick check whether
  729                  * a process has P_SYSTEM.
  730                  */
  731                 if ((p->p_flag & P_SYSTEM) != 0)
  732                         continue;
  733 
  734                 /*
  735                  * Do not swapout a process that
  736                  * is waiting for VM data
  737                  * structures as there is a possible
  738                  * deadlock.  Test this first as
  739                  * this may block.
  740                  *
  741                  * Lock the map until swapout
  742                  * finishes, or a thread of this
  743                  * process may attempt to alter
  744                  * the map.
  745                  */
  746                 PROC_LOCK(p);
  747                 vm = p->p_vmspace;
  748                 KASSERT(vm != NULL,
  749                         ("swapout_procs: a process has no address space"));
  750                 ++vm->vm_refcnt;
  751                 PROC_UNLOCK(p);
  752                 if (!vm_map_trylock(&vm->vm_map))
  753                         goto nextproc1;
  754 
  755                 PROC_LOCK(p);
  756                 if (p->p_lock != 0 ||
  757                     (p->p_flag & (P_STOPPED_SINGLE|P_TRACED|P_SYSTEM|P_WEXIT)
  758                     ) != 0) {
  759                         goto nextproc2;
  760                 }
  761                 /*
  762                  * only aiod changes vmspace, however it will be
  763                  * skipped because of the if statement above checking 
  764                  * for P_SYSTEM
  765                  */
  766                 if ((p->p_sflag & (PS_INMEM|PS_SWAPPINGOUT|PS_SWAPPINGIN)) != PS_INMEM)
  767                         goto nextproc2;
  768 
  769                 switch (p->p_state) {
  770                 default:
  771                         /* Don't swap out processes in any sort
  772                          * of 'special' state. */
  773                         break;
  774 
  775                 case PRS_NORMAL:
  776                         mtx_lock_spin(&sched_lock);
  777                         /*
  778                          * do not swapout a realtime process
  779                          * Check all the thread groups..
  780                          */
  781                         FOREACH_KSEGRP_IN_PROC(p, kg) {
  782                                 if (PRI_IS_REALTIME(kg->kg_pri_class))
  783                                         goto nextproc;
  784 
  785                                 /*
  786                                  * Guarantee swap_idle_threshold1
  787                                  * time in memory.
  788                                  */
  789                                 if (kg->kg_slptime < swap_idle_threshold1)
  790                                         goto nextproc;
  791 
  792                                 /*
  793                                  * Do not swapout a process if it is
  794                                  * waiting on a critical event of some
  795                                  * kind or there is a thread whose
  796                                  * pageable memory may be accessed.
  797                                  *
  798                                  * This could be refined to support
  799                                  * swapping out a thread.
  800                                  */
  801                                 FOREACH_THREAD_IN_GROUP(kg, td) {
  802                                         if ((td->td_priority) < PSOCK ||
  803                                             !thread_safetoswapout(td))
  804                                                 goto nextproc;
  805                                 }
  806                                 /*
  807                                  * If the system is under memory stress,
  808                                  * or if we are swapping
  809                                  * idle processes >= swap_idle_threshold2,
  810                                  * then swap the process out.
  811                                  */
  812                                 if (((action & VM_SWAP_NORMAL) == 0) &&
  813                                     (((action & VM_SWAP_IDLE) == 0) ||
  814                                     (kg->kg_slptime < swap_idle_threshold2)))
  815                                         goto nextproc;
  816 
  817                                 if (minslptime > kg->kg_slptime)
  818                                         minslptime = kg->kg_slptime;
  819                         }
  820 
  821                         /*
  822                          * If the process has been asleep for awhile and had
  823                          * most of its pages taken away already, swap it out.
  824                          */
  825                         if ((action & VM_SWAP_NORMAL) ||
  826                                 ((action & VM_SWAP_IDLE) &&
  827                                  (minslptime > swap_idle_threshold2))) {
  828                                 swapout(p);
  829                                 didswap++;
  830                                 mtx_unlock_spin(&sched_lock);
  831                                 PROC_UNLOCK(p);
  832                                 vm_map_unlock(&vm->vm_map);
  833                                 vmspace_free(vm);
  834                                 sx_sunlock(&allproc_lock);
  835                                 goto retry;
  836                         }
  837 nextproc:                       
  838                         mtx_unlock_spin(&sched_lock);
  839                 }
  840 nextproc2:
  841                 PROC_UNLOCK(p);
  842                 vm_map_unlock(&vm->vm_map);
  843 nextproc1:
  844                 vmspace_free(vm);
  845                 continue;
  846         }
  847         sx_sunlock(&allproc_lock);
  848         /*
  849          * If we swapped something out, and another process needed memory,
  850          * then wakeup the sched process.
  851          */
  852         if (didswap)
  853                 wakeup(&proc0);
  854 }
  855 
  856 static void
  857 swapout(p)
  858         struct proc *p;
  859 {
  860         struct thread *td;
  861 
  862         PROC_LOCK_ASSERT(p, MA_OWNED);
  863         mtx_assert(&sched_lock, MA_OWNED | MA_NOTRECURSED);
  864 #if defined(SWAP_DEBUG)
  865         printf("swapping out %d\n", p->p_pid);
  866 #endif
  867 
  868         /*
  869          * The states of this process and its threads may have changed
  870          * by now.  Assuming that there is only one pageout daemon thread,
  871          * this process should still be in memory.
  872          */
  873         KASSERT((p->p_sflag & (PS_INMEM|PS_SWAPPINGOUT|PS_SWAPPINGIN)) == PS_INMEM,
  874                 ("swapout: lost a swapout race?"));
  875 
  876 #if defined(INVARIANTS)
  877         /*
  878          * Make sure that all threads are safe to be swapped out.
  879          *
  880          * Alternatively, we could swap out only safe threads.
  881          */
  882         FOREACH_THREAD_IN_PROC(p, td) {
  883                 KASSERT(thread_safetoswapout(td),
  884                         ("swapout: there is a thread not safe for swapout"));
  885         }
  886 #endif /* INVARIANTS */
  887 
  888         ++p->p_stats->p_ru.ru_nswap;
  889         /*
  890          * remember the process resident count
  891          */
  892         p->p_vmspace->vm_swrss = vmspace_resident_count(p->p_vmspace);
  893 
  894         p->p_sflag &= ~PS_INMEM;
  895         p->p_sflag |= PS_SWAPPINGOUT;
  896         PROC_UNLOCK(p);
  897         FOREACH_THREAD_IN_PROC(p, td)
  898                 TD_SET_SWAPPED(td);
  899         mtx_unlock_spin(&sched_lock);
  900 
  901         vm_proc_swapout(p);
  902         FOREACH_THREAD_IN_PROC(p, td)
  903                 pmap_swapout_thread(td);
  904 
  905         PROC_LOCK(p);
  906         mtx_lock_spin(&sched_lock);
  907         p->p_sflag &= ~PS_SWAPPINGOUT;
  908         p->p_swtime = 0;
  909 }
  910 #endif /* !NO_SWAPPING */

Cache object: b9ff0253c479f038275ada4e6274869b


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.