The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/vm/vm_glue.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  * Copyright (c) 1991, 1993
    3  *      The Regents of the University of California.  All rights reserved.
    4  *
    5  * This code is derived from software contributed to Berkeley by
    6  * The Mach Operating System project at Carnegie-Mellon University.
    7  *
    8  * Redistribution and use in source and binary forms, with or without
    9  * modification, are permitted provided that the following conditions
   10  * are met:
   11  * 1. Redistributions of source code must retain the above copyright
   12  *    notice, this list of conditions and the following disclaimer.
   13  * 2. Redistributions in binary form must reproduce the above copyright
   14  *    notice, this list of conditions and the following disclaimer in the
   15  *    documentation and/or other materials provided with the distribution.
   16  * 3. All advertising materials mentioning features or use of this software
   17  *    must display the following acknowledgement:
   18  *      This product includes software developed by the University of
   19  *      California, Berkeley and its contributors.
   20  * 4. Neither the name of the University nor the names of its contributors
   21  *    may be used to endorse or promote products derived from this software
   22  *    without specific prior written permission.
   23  *
   24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   34  * SUCH DAMAGE.
   35  *
   36  *      from: @(#)vm_glue.c     8.6 (Berkeley) 1/5/94
   37  *
   38  *
   39  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
   40  * All rights reserved.
   41  *
   42  * Permission to use, copy, modify and distribute this software and
   43  * its documentation is hereby granted, provided that both the copyright
   44  * notice and this permission notice appear in all copies of the
   45  * software, derivative works or modified versions, and any portions
   46  * thereof, and that both notices appear in supporting documentation.
   47  *
   48  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
   49  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
   50  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
   51  *
   52  * Carnegie Mellon requests users of this software to return to
   53  *
   54  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
   55  *  School of Computer Science
   56  *  Carnegie Mellon University
   57  *  Pittsburgh PA 15213-3890
   58  *
   59  * any improvements or extensions that they make and grant Carnegie the
   60  * rights to redistribute these changes.
   61  */
   62 
   63 #include <sys/cdefs.h>
   64 __FBSDID("$FreeBSD: releng/5.2/sys/vm/vm_glue.c 122384 2003-11-10 01:37:40Z alc $");
   65 
   66 #include "opt_vm.h"
   67 #include "opt_kstack_pages.h"
   68 #include "opt_kstack_max_pages.h"
   69 
   70 #include <sys/param.h>
   71 #include <sys/systm.h>
   72 #include <sys/limits.h>
   73 #include <sys/lock.h>
   74 #include <sys/mutex.h>
   75 #include <sys/proc.h>
   76 #include <sys/resourcevar.h>
   77 #include <sys/shm.h>
   78 #include <sys/vmmeter.h>
   79 #include <sys/sx.h>
   80 #include <sys/sysctl.h>
   81 
   82 #include <sys/kernel.h>
   83 #include <sys/ktr.h>
   84 #include <sys/unistd.h>
   85 
   86 #include <vm/vm.h>
   87 #include <vm/vm_param.h>
   88 #include <vm/pmap.h>
   89 #include <vm/vm_map.h>
   90 #include <vm/vm_page.h>
   91 #include <vm/vm_pageout.h>
   92 #include <vm/vm_object.h>
   93 #include <vm/vm_kern.h>
   94 #include <vm/vm_extern.h>
   95 #include <vm/vm_pager.h>
   96 #include <vm/swap_pager.h>
   97 
   98 #include <sys/user.h>
   99 
  100 extern int maxslp;
  101 
  102 /*
  103  * System initialization
  104  *
  105  * Note: proc0 from proc.h
  106  */
  107 static void vm_init_limits(void *);
  108 SYSINIT(vm_limits, SI_SUB_VM_CONF, SI_ORDER_FIRST, vm_init_limits, &proc0)
  109 
  110 /*
  111  * THIS MUST BE THE LAST INITIALIZATION ITEM!!!
  112  *
  113  * Note: run scheduling should be divorced from the vm system.
  114  */
  115 static void scheduler(void *);
  116 SYSINIT(scheduler, SI_SUB_RUN_SCHEDULER, SI_ORDER_FIRST, scheduler, NULL)
  117 
  118 #ifndef NO_SWAPPING
  119 static void swapout(struct proc *);
  120 static void vm_proc_swapin(struct proc *p);
  121 static void vm_proc_swapout(struct proc *p);
  122 #endif
  123 
  124 /*
  125  * MPSAFE
  126  *
  127  * WARNING!  This code calls vm_map_check_protection() which only checks
  128  * the associated vm_map_entry range.  It does not determine whether the
  129  * contents of the memory is actually readable or writable.  In most cases
  130  * just checking the vm_map_entry is sufficient within the kernel's address
  131  * space.
  132  */
  133 int
  134 kernacc(addr, len, rw)
  135         void *addr;
  136         int len, rw;
  137 {
  138         boolean_t rv;
  139         vm_offset_t saddr, eaddr;
  140         vm_prot_t prot;
  141 
  142         KASSERT((rw & ~VM_PROT_ALL) == 0,
  143             ("illegal ``rw'' argument to kernacc (%x)\n", rw));
  144         prot = rw;
  145         saddr = trunc_page((vm_offset_t)addr);
  146         eaddr = round_page((vm_offset_t)addr + len);
  147         vm_map_lock_read(kernel_map);
  148         rv = vm_map_check_protection(kernel_map, saddr, eaddr, prot);
  149         vm_map_unlock_read(kernel_map);
  150         return (rv == TRUE);
  151 }
  152 
  153 /*
  154  * MPSAFE
  155  *
  156  * WARNING!  This code calls vm_map_check_protection() which only checks
  157  * the associated vm_map_entry range.  It does not determine whether the
  158  * contents of the memory is actually readable or writable.  vmapbuf(),
  159  * vm_fault_quick(), or copyin()/copout()/su*()/fu*() functions should be
  160  * used in conjuction with this call.
  161  */
  162 int
  163 useracc(addr, len, rw)
  164         void *addr;
  165         int len, rw;
  166 {
  167         boolean_t rv;
  168         vm_prot_t prot;
  169         vm_map_t map;
  170 
  171         KASSERT((rw & ~VM_PROT_ALL) == 0,
  172             ("illegal ``rw'' argument to useracc (%x)\n", rw));
  173         prot = rw;
  174         map = &curproc->p_vmspace->vm_map;
  175         if ((vm_offset_t)addr + len > vm_map_max(map) ||
  176             (vm_offset_t)addr + len < (vm_offset_t)addr) {
  177                 return (FALSE);
  178         }
  179         vm_map_lock_read(map);
  180         rv = vm_map_check_protection(map, trunc_page((vm_offset_t)addr),
  181             round_page((vm_offset_t)addr + len), prot);
  182         vm_map_unlock_read(map);
  183         return (rv == TRUE);
  184 }
  185 
  186 /*
  187  * MPSAFE
  188  */
  189 void
  190 vslock(addr, len)
  191         void *addr;
  192         u_int len;
  193 {
  194 
  195         vm_map_wire(&curproc->p_vmspace->vm_map, trunc_page((vm_offset_t)addr),
  196             round_page((vm_offset_t)addr + len),
  197             VM_MAP_WIRE_SYSTEM|VM_MAP_WIRE_NOHOLES);
  198 }
  199 
  200 /*
  201  * MPSAFE
  202  */
  203 void
  204 vsunlock(addr, len)
  205         void *addr;
  206         u_int len;
  207 {
  208 
  209         vm_map_unwire(&curproc->p_vmspace->vm_map,
  210             trunc_page((vm_offset_t)addr),
  211             round_page((vm_offset_t)addr + len),
  212             VM_MAP_WIRE_SYSTEM|VM_MAP_WIRE_NOHOLES);
  213 }
  214 
  215 /*
  216  * Create the U area for a new process.
  217  * This routine directly affects the fork perf for a process.
  218  */
  219 void
  220 vm_proc_new(struct proc *p)
  221 {
  222         vm_page_t ma[UAREA_PAGES];
  223         vm_object_t upobj;
  224         vm_offset_t up;
  225         vm_page_t m;
  226         u_int i;
  227 
  228         /*
  229          * Get a kernel virtual address for the U area for this process.
  230          */
  231         up = kmem_alloc_nofault(kernel_map, UAREA_PAGES * PAGE_SIZE);
  232         if (up == 0)
  233                 panic("vm_proc_new: upage allocation failed");
  234         p->p_uarea = (struct user *)up;
  235 
  236         /*
  237          * Allocate object and page(s) for the U area.
  238          */
  239         upobj = vm_object_allocate(OBJT_DEFAULT, UAREA_PAGES);
  240         p->p_upages_obj = upobj;
  241         VM_OBJECT_LOCK(upobj);
  242         for (i = 0; i < UAREA_PAGES; i++) {
  243                 m = vm_page_grab(upobj, i,
  244                     VM_ALLOC_NORMAL | VM_ALLOC_RETRY | VM_ALLOC_WIRED);
  245                 ma[i] = m;
  246 
  247                 vm_page_lock_queues();
  248                 vm_page_wakeup(m);
  249                 m->valid = VM_PAGE_BITS_ALL;
  250                 vm_page_unlock_queues();
  251         }
  252         VM_OBJECT_UNLOCK(upobj);
  253 
  254         /*
  255          * Enter the pages into the kernel address space.
  256          */
  257         pmap_qenter(up, ma, UAREA_PAGES);
  258 }
  259 
  260 /*
  261  * Dispose the U area for a process that has exited.
  262  * This routine directly impacts the exit perf of a process.
  263  * XXX proc_zone is marked UMA_ZONE_NOFREE, so this should never be called.
  264  */
  265 void
  266 vm_proc_dispose(struct proc *p)
  267 {
  268         vm_object_t upobj;
  269         vm_offset_t up;
  270         vm_page_t m;
  271 
  272         upobj = p->p_upages_obj;
  273         VM_OBJECT_LOCK(upobj);
  274         if (upobj->resident_page_count != UAREA_PAGES)
  275                 panic("vm_proc_dispose: incorrect number of pages in upobj");
  276         vm_page_lock_queues();
  277         while ((m = TAILQ_FIRST(&upobj->memq)) != NULL) {
  278                 vm_page_busy(m);
  279                 vm_page_unwire(m, 0);
  280                 vm_page_free(m);
  281         }
  282         vm_page_unlock_queues();
  283         VM_OBJECT_UNLOCK(upobj);
  284         up = (vm_offset_t)p->p_uarea;
  285         pmap_qremove(up, UAREA_PAGES);
  286         kmem_free(kernel_map, up, UAREA_PAGES * PAGE_SIZE);
  287         vm_object_deallocate(upobj);
  288 }
  289 
  290 #ifndef NO_SWAPPING
  291 /*
  292  * Allow the U area for a process to be prejudicially paged out.
  293  */
  294 static void
  295 vm_proc_swapout(struct proc *p)
  296 {
  297         vm_object_t upobj;
  298         vm_offset_t up;
  299         vm_page_t m;
  300 
  301         upobj = p->p_upages_obj;
  302         VM_OBJECT_LOCK(upobj);
  303         if (upobj->resident_page_count != UAREA_PAGES)
  304                 panic("vm_proc_dispose: incorrect number of pages in upobj");
  305         vm_page_lock_queues();
  306         TAILQ_FOREACH(m, &upobj->memq, listq) {
  307                 vm_page_dirty(m);
  308                 vm_page_unwire(m, 0);
  309         }
  310         vm_page_unlock_queues();
  311         VM_OBJECT_UNLOCK(upobj);
  312         up = (vm_offset_t)p->p_uarea;
  313         pmap_qremove(up, UAREA_PAGES);
  314 }
  315 
  316 /*
  317  * Bring the U area for a specified process back in.
  318  */
  319 static void
  320 vm_proc_swapin(struct proc *p)
  321 {
  322         vm_page_t ma[UAREA_PAGES];
  323         vm_object_t upobj;
  324         vm_offset_t up;
  325         vm_page_t m;
  326         int rv;
  327         int i;
  328 
  329         upobj = p->p_upages_obj;
  330         VM_OBJECT_LOCK(upobj);
  331         for (i = 0; i < UAREA_PAGES; i++) {
  332                 m = vm_page_grab(upobj, i, VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
  333                 if (m->valid != VM_PAGE_BITS_ALL) {
  334                         rv = vm_pager_get_pages(upobj, &m, 1, 0);
  335                         if (rv != VM_PAGER_OK)
  336                                 panic("vm_proc_swapin: cannot get upage");
  337                 }
  338                 ma[i] = m;
  339         }
  340         if (upobj->resident_page_count != UAREA_PAGES)
  341                 panic("vm_proc_swapin: lost pages from upobj");
  342         vm_page_lock_queues();
  343         TAILQ_FOREACH(m, &upobj->memq, listq) {
  344                 m->valid = VM_PAGE_BITS_ALL;
  345                 vm_page_wire(m);
  346                 vm_page_wakeup(m);
  347         }
  348         vm_page_unlock_queues();
  349         VM_OBJECT_UNLOCK(upobj);
  350         up = (vm_offset_t)p->p_uarea;
  351         pmap_qenter(up, ma, UAREA_PAGES);
  352 }
  353 
  354 /*
  355  * Swap in the UAREAs of all processes swapped out to the given device.
  356  * The pages in the UAREA are marked dirty and their swap metadata is freed.
  357  */
  358 void
  359 vm_proc_swapin_all(struct swdevt *devidx)
  360 {
  361         struct proc *p;
  362         vm_object_t object;
  363         vm_page_t m;
  364 
  365 retry:
  366         sx_slock(&allproc_lock);
  367         FOREACH_PROC_IN_SYSTEM(p) {
  368                 PROC_LOCK(p);
  369                 object = p->p_upages_obj;
  370                 if (object != NULL) {
  371                         VM_OBJECT_LOCK(object);
  372                         if (swap_pager_isswapped(object, devidx)) {
  373                                 VM_OBJECT_UNLOCK(object);
  374                                 sx_sunlock(&allproc_lock);
  375                                 faultin(p);
  376                                 PROC_UNLOCK(p);
  377                                 VM_OBJECT_LOCK(object);
  378                                 vm_page_lock_queues();
  379                                 TAILQ_FOREACH(m, &object->memq, listq)
  380                                         vm_page_dirty(m);
  381                                 vm_page_unlock_queues();
  382                                 swap_pager_freespace(object, 0,
  383                                     object->un_pager.swp.swp_bcount);
  384                                 VM_OBJECT_UNLOCK(object);
  385                                 goto retry;
  386                         }
  387                         VM_OBJECT_UNLOCK(object);
  388                 }
  389                 PROC_UNLOCK(p);
  390         }
  391         sx_sunlock(&allproc_lock);
  392 }
  393 #endif
  394 
  395 #ifndef KSTACK_MAX_PAGES
  396 #define KSTACK_MAX_PAGES 32
  397 #endif
  398 
  399 /*
  400  * Create the kernel stack (including pcb for i386) for a new thread.
  401  * This routine directly affects the fork perf for a process and
  402  * create performance for a thread.
  403  */
  404 void
  405 vm_thread_new(struct thread *td, int pages)
  406 {
  407         vm_object_t ksobj;
  408         vm_offset_t ks;
  409         vm_page_t m, ma[KSTACK_MAX_PAGES];
  410         int i;
  411 
  412         /* Bounds check */
  413         if (pages <= 1)
  414                 pages = KSTACK_PAGES;
  415         else if (pages > KSTACK_MAX_PAGES)
  416                 pages = KSTACK_MAX_PAGES;
  417         /*
  418          * Allocate an object for the kstack.
  419          */
  420         ksobj = vm_object_allocate(OBJT_DEFAULT, pages);
  421         td->td_kstack_obj = ksobj;
  422         /*
  423          * Get a kernel virtual address for this thread's kstack.
  424          */
  425         ks = kmem_alloc_nofault(kernel_map,
  426            (pages + KSTACK_GUARD_PAGES) * PAGE_SIZE);
  427         if (ks == 0)
  428                 panic("vm_thread_new: kstack allocation failed");
  429         if (KSTACK_GUARD_PAGES != 0) {
  430                 pmap_qremove(ks, KSTACK_GUARD_PAGES);
  431                 ks += KSTACK_GUARD_PAGES * PAGE_SIZE;
  432         }
  433         td->td_kstack = ks;
  434         /*
  435          * Knowing the number of pages allocated is useful when you
  436          * want to deallocate them.
  437          */
  438         td->td_kstack_pages = pages;
  439         /* 
  440          * For the length of the stack, link in a real page of ram for each
  441          * page of stack.
  442          */
  443         VM_OBJECT_LOCK(ksobj);
  444         for (i = 0; i < pages; i++) {
  445                 /*
  446                  * Get a kernel stack page.
  447                  */
  448                 m = vm_page_grab(ksobj, i,
  449                     VM_ALLOC_NORMAL | VM_ALLOC_RETRY | VM_ALLOC_WIRED);
  450                 ma[i] = m;
  451                 vm_page_lock_queues();
  452                 vm_page_wakeup(m);
  453                 m->valid = VM_PAGE_BITS_ALL;
  454                 vm_page_unlock_queues();
  455         }
  456         VM_OBJECT_UNLOCK(ksobj);
  457         pmap_qenter(ks, ma, pages);
  458 }
  459 
  460 /*
  461  * Dispose of a thread's kernel stack.
  462  */
  463 void
  464 vm_thread_dispose(struct thread *td)
  465 {
  466         vm_object_t ksobj;
  467         vm_offset_t ks;
  468         vm_page_t m;
  469         int i, pages;
  470 
  471         pages = td->td_kstack_pages;
  472         ksobj = td->td_kstack_obj;
  473         ks = td->td_kstack;
  474         pmap_qremove(ks, pages);
  475         VM_OBJECT_LOCK(ksobj);
  476         for (i = 0; i < pages; i++) {
  477                 m = vm_page_lookup(ksobj, i);
  478                 if (m == NULL)
  479                         panic("vm_thread_dispose: kstack already missing?");
  480                 vm_page_lock_queues();
  481                 vm_page_busy(m);
  482                 vm_page_unwire(m, 0);
  483                 vm_page_free(m);
  484                 vm_page_unlock_queues();
  485         }
  486         VM_OBJECT_UNLOCK(ksobj);
  487         vm_object_deallocate(ksobj);
  488         kmem_free(kernel_map, ks - (KSTACK_GUARD_PAGES * PAGE_SIZE),
  489             (pages + KSTACK_GUARD_PAGES) * PAGE_SIZE);
  490 }
  491 
  492 /*
  493  * Allow a thread's kernel stack to be paged out.
  494  */
  495 void
  496 vm_thread_swapout(struct thread *td)
  497 {
  498         vm_object_t ksobj;
  499         vm_page_t m;
  500         int i, pages;
  501 
  502         cpu_thread_swapout(td);
  503         pages = td->td_kstack_pages;
  504         ksobj = td->td_kstack_obj;
  505         pmap_qremove(td->td_kstack, pages);
  506         VM_OBJECT_LOCK(ksobj);
  507         for (i = 0; i < pages; i++) {
  508                 m = vm_page_lookup(ksobj, i);
  509                 if (m == NULL)
  510                         panic("vm_thread_swapout: kstack already missing?");
  511                 vm_page_lock_queues();
  512                 vm_page_dirty(m);
  513                 vm_page_unwire(m, 0);
  514                 vm_page_unlock_queues();
  515         }
  516         VM_OBJECT_UNLOCK(ksobj);
  517 }
  518 
  519 /*
  520  * Bring the kernel stack for a specified thread back in.
  521  */
  522 void
  523 vm_thread_swapin(struct thread *td)
  524 {
  525         vm_object_t ksobj;
  526         vm_page_t m, ma[KSTACK_MAX_PAGES];
  527         int i, pages, rv;
  528 
  529         pages = td->td_kstack_pages;
  530         ksobj = td->td_kstack_obj;
  531         VM_OBJECT_LOCK(ksobj);
  532         for (i = 0; i < pages; i++) {
  533                 m = vm_page_grab(ksobj, i, VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
  534                 if (m->valid != VM_PAGE_BITS_ALL) {
  535                         rv = vm_pager_get_pages(ksobj, &m, 1, 0);
  536                         if (rv != VM_PAGER_OK)
  537                                 panic("vm_thread_swapin: cannot get kstack for proc: %d", td->td_proc->p_pid);
  538                         m = vm_page_lookup(ksobj, i);
  539                         m->valid = VM_PAGE_BITS_ALL;
  540                 }
  541                 ma[i] = m;
  542                 vm_page_lock_queues();
  543                 vm_page_wire(m);
  544                 vm_page_wakeup(m);
  545                 vm_page_unlock_queues();
  546         }
  547         VM_OBJECT_UNLOCK(ksobj);
  548         pmap_qenter(td->td_kstack, ma, pages);
  549         cpu_thread_swapin(td);
  550 }
  551 
  552 /*
  553  * Set up a variable-sized alternate kstack.
  554  */
  555 void
  556 vm_thread_new_altkstack(struct thread *td, int pages)
  557 {
  558 
  559         td->td_altkstack = td->td_kstack;
  560         td->td_altkstack_obj = td->td_kstack_obj;
  561         td->td_altkstack_pages = td->td_kstack_pages;
  562 
  563         vm_thread_new(td, pages);
  564 }
  565 
  566 /*
  567  * Restore the original kstack.
  568  */
  569 void
  570 vm_thread_dispose_altkstack(struct thread *td)
  571 {
  572 
  573         vm_thread_dispose(td);
  574 
  575         td->td_kstack = td->td_altkstack;
  576         td->td_kstack_obj = td->td_altkstack_obj;
  577         td->td_kstack_pages = td->td_altkstack_pages;
  578         td->td_altkstack = 0;
  579         td->td_altkstack_obj = NULL;
  580         td->td_altkstack_pages = 0;
  581 }
  582 
  583 /*
  584  * Implement fork's actions on an address space.
  585  * Here we arrange for the address space to be copied or referenced,
  586  * allocate a user struct (pcb and kernel stack), then call the
  587  * machine-dependent layer to fill those in and make the new process
  588  * ready to run.  The new process is set up so that it returns directly
  589  * to user mode to avoid stack copying and relocation problems.
  590  */
  591 void
  592 vm_forkproc(td, p2, td2, flags)
  593         struct thread *td;
  594         struct proc *p2;
  595         struct thread *td2;
  596         int flags;
  597 {
  598         struct proc *p1 = td->td_proc;
  599         struct user *up;
  600 
  601         GIANT_REQUIRED;
  602 
  603         if ((flags & RFPROC) == 0) {
  604                 /*
  605                  * Divorce the memory, if it is shared, essentially
  606                  * this changes shared memory amongst threads, into
  607                  * COW locally.
  608                  */
  609                 if ((flags & RFMEM) == 0) {
  610                         if (p1->p_vmspace->vm_refcnt > 1) {
  611                                 vmspace_unshare(p1);
  612                         }
  613                 }
  614                 cpu_fork(td, p2, td2, flags);
  615                 return;
  616         }
  617 
  618         if (flags & RFMEM) {
  619                 p2->p_vmspace = p1->p_vmspace;
  620                 p1->p_vmspace->vm_refcnt++;
  621         }
  622 
  623         while (vm_page_count_severe()) {
  624                 VM_WAIT;
  625         }
  626 
  627         if ((flags & RFMEM) == 0) {
  628                 p2->p_vmspace = vmspace_fork(p1->p_vmspace);
  629 
  630                 pmap_pinit2(vmspace_pmap(p2->p_vmspace));
  631 
  632                 if (p1->p_vmspace->vm_shm)
  633                         shmfork(p1, p2);
  634         }
  635 
  636         /* XXXKSE this is unsatisfactory but should be adequate */
  637         up = p2->p_uarea;
  638         MPASS(p2->p_sigacts != NULL);
  639 
  640         /*
  641          * p_stats currently points at fields in the user struct
  642          * but not at &u, instead at p_addr. Copy parts of
  643          * p_stats; zero the rest of p_stats (statistics).
  644          */
  645         p2->p_stats = &up->u_stats;
  646         bzero(&up->u_stats.pstat_startzero,
  647             (unsigned) ((caddr_t) &up->u_stats.pstat_endzero -
  648                 (caddr_t) &up->u_stats.pstat_startzero));
  649         bcopy(&p1->p_stats->pstat_startcopy, &up->u_stats.pstat_startcopy,
  650             ((caddr_t) &up->u_stats.pstat_endcopy -
  651                 (caddr_t) &up->u_stats.pstat_startcopy));
  652 
  653         /*
  654          * cpu_fork will copy and update the pcb, set up the kernel stack,
  655          * and make the child ready to run.
  656          */
  657         cpu_fork(td, p2, td2, flags);
  658 }
  659 
  660 /*
  661  * Called after process has been wait(2)'ed apon and is being reaped.
  662  * The idea is to reclaim resources that we could not reclaim while
  663  * the process was still executing.
  664  */
  665 void
  666 vm_waitproc(p)
  667         struct proc *p;
  668 {
  669 
  670         GIANT_REQUIRED;
  671         vmspace_exitfree(p);            /* and clean-out the vmspace */
  672 }
  673 
  674 /*
  675  * Set default limits for VM system.
  676  * Called for proc 0, and then inherited by all others.
  677  *
  678  * XXX should probably act directly on proc0.
  679  */
  680 static void
  681 vm_init_limits(udata)
  682         void *udata;
  683 {
  684         struct proc *p = udata;
  685         int rss_limit;
  686 
  687         /*
  688          * Set up the initial limits on process VM. Set the maximum resident
  689          * set size to be half of (reasonably) available memory.  Since this
  690          * is a soft limit, it comes into effect only when the system is out
  691          * of memory - half of main memory helps to favor smaller processes,
  692          * and reduces thrashing of the object cache.
  693          */
  694         p->p_rlimit[RLIMIT_STACK].rlim_cur = dflssiz;
  695         p->p_rlimit[RLIMIT_STACK].rlim_max = maxssiz;
  696         p->p_rlimit[RLIMIT_DATA].rlim_cur = dfldsiz;
  697         p->p_rlimit[RLIMIT_DATA].rlim_max = maxdsiz;
  698         /* limit the limit to no less than 2MB */
  699         rss_limit = max(cnt.v_free_count, 512);
  700         p->p_rlimit[RLIMIT_RSS].rlim_cur = ptoa(rss_limit);
  701         p->p_rlimit[RLIMIT_RSS].rlim_max = RLIM_INFINITY;
  702 }
  703 
  704 void
  705 faultin(p)
  706         struct proc *p;
  707 {
  708 #ifdef NO_SWAPPING
  709 
  710         PROC_LOCK_ASSERT(p, MA_OWNED);
  711         if ((p->p_sflag & PS_INMEM) == 0)
  712                 panic("faultin: proc swapped out with NO_SWAPPING!");
  713 #else /* !NO_SWAPPING */
  714         struct thread *td;
  715 
  716         GIANT_REQUIRED;
  717         PROC_LOCK_ASSERT(p, MA_OWNED);
  718         /*
  719          * If another process is swapping in this process,
  720          * just wait until it finishes.
  721          */
  722         if (p->p_sflag & PS_SWAPPINGIN)
  723                 msleep(&p->p_sflag, &p->p_mtx, PVM, "faultin", 0);
  724         else if ((p->p_sflag & PS_INMEM) == 0) {
  725                 /*
  726                  * Don't let another thread swap process p out while we are
  727                  * busy swapping it in.
  728                  */
  729                 ++p->p_lock;
  730                 mtx_lock_spin(&sched_lock);
  731                 p->p_sflag |= PS_SWAPPINGIN;
  732                 mtx_unlock_spin(&sched_lock);
  733                 PROC_UNLOCK(p);
  734 
  735                 vm_proc_swapin(p);
  736                 FOREACH_THREAD_IN_PROC(p, td)
  737                         vm_thread_swapin(td);
  738 
  739                 PROC_LOCK(p);
  740                 mtx_lock_spin(&sched_lock);
  741                 p->p_sflag &= ~PS_SWAPPINGIN;
  742                 p->p_sflag |= PS_INMEM;
  743                 FOREACH_THREAD_IN_PROC(p, td) {
  744                         TD_CLR_SWAPPED(td);
  745                         if (TD_CAN_RUN(td))
  746                                 setrunnable(td);
  747                 }
  748                 mtx_unlock_spin(&sched_lock);
  749 
  750                 wakeup(&p->p_sflag);
  751 
  752                 /* Allow other threads to swap p out now. */
  753                 --p->p_lock;
  754         }
  755 #endif /* NO_SWAPPING */
  756 }
  757 
  758 /*
  759  * This swapin algorithm attempts to swap-in processes only if there
  760  * is enough space for them.  Of course, if a process waits for a long
  761  * time, it will be swapped in anyway.
  762  *
  763  *  XXXKSE - process with the thread with highest priority counts..
  764  *
  765  * Giant is still held at this point, to be released in tsleep.
  766  */
  767 /* ARGSUSED*/
  768 static void
  769 scheduler(dummy)
  770         void *dummy;
  771 {
  772         struct proc *p;
  773         struct thread *td;
  774         int pri;
  775         struct proc *pp;
  776         int ppri;
  777 
  778         mtx_assert(&Giant, MA_OWNED | MA_NOTRECURSED);
  779         /* GIANT_REQUIRED */
  780 
  781 loop:
  782         if (vm_page_count_min()) {
  783                 VM_WAIT;
  784                 goto loop;
  785         }
  786 
  787         pp = NULL;
  788         ppri = INT_MIN;
  789         sx_slock(&allproc_lock);
  790         FOREACH_PROC_IN_SYSTEM(p) {
  791                 struct ksegrp *kg;
  792                 if (p->p_sflag & (PS_INMEM | PS_SWAPPINGOUT | PS_SWAPPINGIN)) {
  793                         continue;
  794                 }
  795                 mtx_lock_spin(&sched_lock);
  796                 FOREACH_THREAD_IN_PROC(p, td) {
  797                         /*
  798                          * An otherwise runnable thread of a process
  799                          * swapped out has only the TDI_SWAPPED bit set.
  800                          * 
  801                          */
  802                         if (td->td_inhibitors == TDI_SWAPPED) {
  803                                 kg = td->td_ksegrp;
  804                                 pri = p->p_swtime + kg->kg_slptime;
  805                                 if ((p->p_sflag & PS_SWAPINREQ) == 0) {
  806                                         pri -= kg->kg_nice * 8;
  807                                 }
  808 
  809                                 /*
  810                                  * if this ksegrp is higher priority
  811                                  * and there is enough space, then select
  812                                  * this process instead of the previous
  813                                  * selection.
  814                                  */
  815                                 if (pri > ppri) {
  816                                         pp = p;
  817                                         ppri = pri;
  818                                 }
  819                         }
  820                 }
  821                 mtx_unlock_spin(&sched_lock);
  822         }
  823         sx_sunlock(&allproc_lock);
  824 
  825         /*
  826          * Nothing to do, back to sleep.
  827          */
  828         if ((p = pp) == NULL) {
  829                 tsleep(&proc0, PVM, "sched", maxslp * hz / 2);
  830                 goto loop;
  831         }
  832         PROC_LOCK(p);
  833 
  834         /*
  835          * Another process may be bringing or may have already
  836          * brought this process in while we traverse all threads.
  837          * Or, this process may even be being swapped out again.
  838          */
  839         if (p->p_sflag & (PS_INMEM | PS_SWAPPINGOUT | PS_SWAPPINGIN)) {
  840                 PROC_UNLOCK(p);
  841                 goto loop;
  842         }
  843 
  844         mtx_lock_spin(&sched_lock);
  845         p->p_sflag &= ~PS_SWAPINREQ;
  846         mtx_unlock_spin(&sched_lock);
  847 
  848         /*
  849          * We would like to bring someone in. (only if there is space).
  850          * [What checks the space? ]
  851          */
  852         faultin(p);
  853         PROC_UNLOCK(p);
  854         mtx_lock_spin(&sched_lock);
  855         p->p_swtime = 0;
  856         mtx_unlock_spin(&sched_lock);
  857         goto loop;
  858 }
  859 
  860 #ifndef NO_SWAPPING
  861 
  862 /*
  863  * Swap_idle_threshold1 is the guaranteed swapped in time for a process
  864  */
  865 static int swap_idle_threshold1 = 2;
  866 SYSCTL_INT(_vm, OID_AUTO, swap_idle_threshold1, CTLFLAG_RW,
  867     &swap_idle_threshold1, 0, "Guaranteed swapped in time for a process");
  868 
  869 /*
  870  * Swap_idle_threshold2 is the time that a process can be idle before
  871  * it will be swapped out, if idle swapping is enabled.
  872  */
  873 static int swap_idle_threshold2 = 10;
  874 SYSCTL_INT(_vm, OID_AUTO, swap_idle_threshold2, CTLFLAG_RW,
  875     &swap_idle_threshold2, 0, "Time before a process will be swapped out");
  876 
  877 /*
  878  * Swapout is driven by the pageout daemon.  Very simple, we find eligible
  879  * procs and unwire their u-areas.  We try to always "swap" at least one
  880  * process in case we need the room for a swapin.
  881  * If any procs have been sleeping/stopped for at least maxslp seconds,
  882  * they are swapped.  Else, we swap the longest-sleeping or stopped process,
  883  * if any, otherwise the longest-resident process.
  884  */
  885 void
  886 swapout_procs(action)
  887 int action;
  888 {
  889         struct proc *p;
  890         struct thread *td;
  891         struct ksegrp *kg;
  892         int didswap = 0;
  893 
  894         GIANT_REQUIRED;
  895 
  896 retry:
  897         sx_slock(&allproc_lock);
  898         FOREACH_PROC_IN_SYSTEM(p) {
  899                 struct vmspace *vm;
  900                 int minslptime = 100000;
  901                 
  902                 /*
  903                  * Watch out for a process in
  904                  * creation.  It may have no
  905                  * address space or lock yet.
  906                  */
  907                 mtx_lock_spin(&sched_lock);
  908                 if (p->p_state == PRS_NEW) {
  909                         mtx_unlock_spin(&sched_lock);
  910                         continue;
  911                 }
  912                 mtx_unlock_spin(&sched_lock);
  913 
  914                 /*
  915                  * An aio daemon switches its
  916                  * address space while running.
  917                  * Perform a quick check whether
  918                  * a process has P_SYSTEM.
  919                  */
  920                 if ((p->p_flag & P_SYSTEM) != 0)
  921                         continue;
  922 
  923                 /*
  924                  * Do not swapout a process that
  925                  * is waiting for VM data
  926                  * structures as there is a possible
  927                  * deadlock.  Test this first as
  928                  * this may block.
  929                  *
  930                  * Lock the map until swapout
  931                  * finishes, or a thread of this
  932                  * process may attempt to alter
  933                  * the map.
  934                  */
  935                 PROC_LOCK(p);
  936                 vm = p->p_vmspace;
  937                 KASSERT(vm != NULL,
  938                         ("swapout_procs: a process has no address space"));
  939                 ++vm->vm_refcnt;
  940                 PROC_UNLOCK(p);
  941                 if (!vm_map_trylock(&vm->vm_map))
  942                         goto nextproc1;
  943 
  944                 PROC_LOCK(p);
  945                 if (p->p_lock != 0 ||
  946                     (p->p_flag & (P_STOPPED_SINGLE|P_TRACED|P_SYSTEM|P_WEXIT)
  947                     ) != 0) {
  948                         goto nextproc2;
  949                 }
  950                 /*
  951                  * only aiod changes vmspace, however it will be
  952                  * skipped because of the if statement above checking 
  953                  * for P_SYSTEM
  954                  */
  955                 if ((p->p_sflag & (PS_INMEM|PS_SWAPPINGOUT|PS_SWAPPINGIN)) != PS_INMEM)
  956                         goto nextproc2;
  957 
  958                 switch (p->p_state) {
  959                 default:
  960                         /* Don't swap out processes in any sort
  961                          * of 'special' state. */
  962                         break;
  963 
  964                 case PRS_NORMAL:
  965                         mtx_lock_spin(&sched_lock);
  966                         /*
  967                          * do not swapout a realtime process
  968                          * Check all the thread groups..
  969                          */
  970                         FOREACH_KSEGRP_IN_PROC(p, kg) {
  971                                 if (PRI_IS_REALTIME(kg->kg_pri_class))
  972                                         goto nextproc;
  973 
  974                                 /*
  975                                  * Guarantee swap_idle_threshold1
  976                                  * time in memory.
  977                                  */
  978                                 if (kg->kg_slptime < swap_idle_threshold1)
  979                                         goto nextproc;
  980 
  981                                 /*
  982                                  * Do not swapout a process if it is
  983                                  * waiting on a critical event of some
  984                                  * kind or there is a thread whose
  985                                  * pageable memory may be accessed.
  986                                  *
  987                                  * This could be refined to support
  988                                  * swapping out a thread.
  989                                  */
  990                                 FOREACH_THREAD_IN_GROUP(kg, td) {
  991                                         if ((td->td_priority) < PSOCK ||
  992                                             !thread_safetoswapout(td))
  993                                                 goto nextproc;
  994                                 }
  995                                 /*
  996                                  * If the system is under memory stress,
  997                                  * or if we are swapping
  998                                  * idle processes >= swap_idle_threshold2,
  999                                  * then swap the process out.
 1000                                  */
 1001                                 if (((action & VM_SWAP_NORMAL) == 0) &&
 1002                                     (((action & VM_SWAP_IDLE) == 0) ||
 1003                                     (kg->kg_slptime < swap_idle_threshold2)))
 1004                                         goto nextproc;
 1005 
 1006                                 if (minslptime > kg->kg_slptime)
 1007                                         minslptime = kg->kg_slptime;
 1008                         }
 1009 
 1010                         /*
 1011                          * If the process has been asleep for awhile and had
 1012                          * most of its pages taken away already, swap it out.
 1013                          */
 1014                         if ((action & VM_SWAP_NORMAL) ||
 1015                                 ((action & VM_SWAP_IDLE) &&
 1016                                  (minslptime > swap_idle_threshold2))) {
 1017                                 swapout(p);
 1018                                 didswap++;
 1019                                 mtx_unlock_spin(&sched_lock);
 1020                                 PROC_UNLOCK(p);
 1021                                 vm_map_unlock(&vm->vm_map);
 1022                                 vmspace_free(vm);
 1023                                 sx_sunlock(&allproc_lock);
 1024                                 goto retry;
 1025                         }
 1026 nextproc:                       
 1027                         mtx_unlock_spin(&sched_lock);
 1028                 }
 1029 nextproc2:
 1030                 PROC_UNLOCK(p);
 1031                 vm_map_unlock(&vm->vm_map);
 1032 nextproc1:
 1033                 vmspace_free(vm);
 1034                 continue;
 1035         }
 1036         sx_sunlock(&allproc_lock);
 1037         /*
 1038          * If we swapped something out, and another process needed memory,
 1039          * then wakeup the sched process.
 1040          */
 1041         if (didswap)
 1042                 wakeup(&proc0);
 1043 }
 1044 
 1045 static void
 1046 swapout(p)
 1047         struct proc *p;
 1048 {
 1049         struct thread *td;
 1050 
 1051         PROC_LOCK_ASSERT(p, MA_OWNED);
 1052         mtx_assert(&sched_lock, MA_OWNED | MA_NOTRECURSED);
 1053 #if defined(SWAP_DEBUG)
 1054         printf("swapping out %d\n", p->p_pid);
 1055 #endif
 1056 
 1057         /*
 1058          * The states of this process and its threads may have changed
 1059          * by now.  Assuming that there is only one pageout daemon thread,
 1060          * this process should still be in memory.
 1061          */
 1062         KASSERT((p->p_sflag & (PS_INMEM|PS_SWAPPINGOUT|PS_SWAPPINGIN)) == PS_INMEM,
 1063                 ("swapout: lost a swapout race?"));
 1064 
 1065 #if defined(INVARIANTS)
 1066         /*
 1067          * Make sure that all threads are safe to be swapped out.
 1068          *
 1069          * Alternatively, we could swap out only safe threads.
 1070          */
 1071         FOREACH_THREAD_IN_PROC(p, td) {
 1072                 KASSERT(thread_safetoswapout(td),
 1073                         ("swapout: there is a thread not safe for swapout"));
 1074         }
 1075 #endif /* INVARIANTS */
 1076 
 1077         ++p->p_stats->p_ru.ru_nswap;
 1078         /*
 1079          * remember the process resident count
 1080          */
 1081         p->p_vmspace->vm_swrss = vmspace_resident_count(p->p_vmspace);
 1082 
 1083         p->p_sflag &= ~PS_INMEM;
 1084         p->p_sflag |= PS_SWAPPINGOUT;
 1085         PROC_UNLOCK(p);
 1086         FOREACH_THREAD_IN_PROC(p, td)
 1087                 TD_SET_SWAPPED(td);
 1088         mtx_unlock_spin(&sched_lock);
 1089 
 1090         vm_proc_swapout(p);
 1091         FOREACH_THREAD_IN_PROC(p, td)
 1092                 vm_thread_swapout(td);
 1093 
 1094         PROC_LOCK(p);
 1095         mtx_lock_spin(&sched_lock);
 1096         p->p_sflag &= ~PS_SWAPPINGOUT;
 1097         p->p_swtime = 0;
 1098 }
 1099 #endif /* !NO_SWAPPING */

Cache object: 55de2ccf9d719454d4a3e5daa090bda1


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.