The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/uvm/uvm_glue.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*      $NetBSD: uvm_glue.c,v 1.133.6.1 2009/04/01 00:25:23 snj Exp $   */
    2 
    3 /*
    4  * Copyright (c) 1997 Charles D. Cranor and Washington University.
    5  * Copyright (c) 1991, 1993, The Regents of the University of California.
    6  *
    7  * All rights reserved.
    8  *
    9  * This code is derived from software contributed to Berkeley by
   10  * The Mach Operating System project at Carnegie-Mellon University.
   11  *
   12  * Redistribution and use in source and binary forms, with or without
   13  * modification, are permitted provided that the following conditions
   14  * are met:
   15  * 1. Redistributions of source code must retain the above copyright
   16  *    notice, this list of conditions and the following disclaimer.
   17  * 2. Redistributions in binary form must reproduce the above copyright
   18  *    notice, this list of conditions and the following disclaimer in the
   19  *    documentation and/or other materials provided with the distribution.
   20  * 3. All advertising materials mentioning features or use of this software
   21  *    must display the following acknowledgement:
   22  *      This product includes software developed by Charles D. Cranor,
   23  *      Washington University, the University of California, Berkeley and
   24  *      its contributors.
   25  * 4. Neither the name of the University nor the names of its contributors
   26  *    may be used to endorse or promote products derived from this software
   27  *    without specific prior written permission.
   28  *
   29  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   30  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   31  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   32  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   33  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   34  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   35  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   36  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   37  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   38  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   39  * SUCH DAMAGE.
   40  *
   41  *      @(#)vm_glue.c   8.6 (Berkeley) 1/5/94
   42  * from: Id: uvm_glue.c,v 1.1.2.8 1998/02/07 01:16:54 chs Exp
   43  *
   44  *
   45  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
   46  * All rights reserved.
   47  *
   48  * Permission to use, copy, modify and distribute this software and
   49  * its documentation is hereby granted, provided that both the copyright
   50  * notice and this permission notice appear in all copies of the
   51  * software, derivative works or modified versions, and any portions
   52  * thereof, and that both notices appear in supporting documentation.
   53  *
   54  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
   55  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
   56  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
   57  *
   58  * Carnegie Mellon requests users of this software to return to
   59  *
   60  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
   61  *  School of Computer Science
   62  *  Carnegie Mellon University
   63  *  Pittsburgh PA 15213-3890
   64  *
   65  * any improvements or extensions that they make and grant Carnegie the
   66  * rights to redistribute these changes.
   67  */
   68 
   69 #include <sys/cdefs.h>
   70 __KERNEL_RCSID(0, "$NetBSD: uvm_glue.c,v 1.133.6.1 2009/04/01 00:25:23 snj Exp $");
   71 
   72 #include "opt_coredump.h"
   73 #include "opt_kgdb.h"
   74 #include "opt_kstack.h"
   75 #include "opt_uvmhist.h"
   76 
   77 /*
   78  * uvm_glue.c: glue functions
   79  */
   80 
   81 #include <sys/param.h>
   82 #include <sys/systm.h>
   83 #include <sys/proc.h>
   84 #include <sys/resourcevar.h>
   85 #include <sys/buf.h>
   86 #include <sys/user.h>
   87 #include <sys/syncobj.h>
   88 #include <sys/cpu.h>
   89 #include <sys/atomic.h>
   90 
   91 #include <uvm/uvm.h>
   92 
   93 /*
   94  * local prototypes
   95  */
   96 
   97 static void uvm_swapout(struct lwp *);
   98 static int uarea_swapin(vaddr_t);
   99 
  100 /*
  101  * XXXCDC: do these really belong here?
  102  */
  103 
  104 /*
  105  * uvm_kernacc: can the kernel access a region of memory
  106  *
  107  * - used only by /dev/kmem driver (mem.c)
  108  */
  109 
  110 bool
  111 uvm_kernacc(void *addr, size_t len, int rw)
  112 {
  113         bool rv;
  114         vaddr_t saddr, eaddr;
  115         vm_prot_t prot = rw == B_READ ? VM_PROT_READ : VM_PROT_WRITE;
  116 
  117         saddr = trunc_page((vaddr_t)addr);
  118         eaddr = round_page((vaddr_t)addr + len);
  119         vm_map_lock_read(kernel_map);
  120         rv = uvm_map_checkprot(kernel_map, saddr, eaddr, prot);
  121         vm_map_unlock_read(kernel_map);
  122 
  123         return(rv);
  124 }
  125 
  126 #ifdef KGDB
  127 /*
  128  * Change protections on kernel pages from addr to addr+len
  129  * (presumably so debugger can plant a breakpoint).
  130  *
  131  * We force the protection change at the pmap level.  If we were
  132  * to use vm_map_protect a change to allow writing would be lazily-
  133  * applied meaning we would still take a protection fault, something
  134  * we really don't want to do.  It would also fragment the kernel
  135  * map unnecessarily.  We cannot use pmap_protect since it also won't
  136  * enforce a write-enable request.  Using pmap_enter is the only way
  137  * we can ensure the change takes place properly.
  138  */
  139 void
  140 uvm_chgkprot(void *addr, size_t len, int rw)
  141 {
  142         vm_prot_t prot;
  143         paddr_t pa;
  144         vaddr_t sva, eva;
  145 
  146         prot = rw == B_READ ? VM_PROT_READ : VM_PROT_READ|VM_PROT_WRITE;
  147         eva = round_page((vaddr_t)addr + len);
  148         for (sva = trunc_page((vaddr_t)addr); sva < eva; sva += PAGE_SIZE) {
  149                 /*
  150                  * Extract physical address for the page.
  151                  */
  152                 if (pmap_extract(pmap_kernel(), sva, &pa) == false)
  153                         panic("%s: invalid page", __func__);
  154                 pmap_enter(pmap_kernel(), sva, pa, prot, PMAP_WIRED);
  155         }
  156         pmap_update(pmap_kernel());
  157 }
  158 #endif
  159 
  160 /*
  161  * uvm_vslock: wire user memory for I/O
  162  *
  163  * - called from physio and sys___sysctl
  164  * - XXXCDC: consider nuking this (or making it a macro?)
  165  */
  166 
  167 int
  168 uvm_vslock(struct vmspace *vs, void *addr, size_t len, vm_prot_t access_type)
  169 {
  170         struct vm_map *map;
  171         vaddr_t start, end;
  172         int error;
  173 
  174         map = &vs->vm_map;
  175         start = trunc_page((vaddr_t)addr);
  176         end = round_page((vaddr_t)addr + len);
  177         error = uvm_fault_wire(map, start, end, access_type, 0);
  178         return error;
  179 }
  180 
  181 /*
  182  * uvm_vsunlock: unwire user memory wired by uvm_vslock()
  183  *
  184  * - called from physio and sys___sysctl
  185  * - XXXCDC: consider nuking this (or making it a macro?)
  186  */
  187 
  188 void
  189 uvm_vsunlock(struct vmspace *vs, void *addr, size_t len)
  190 {
  191         uvm_fault_unwire(&vs->vm_map, trunc_page((vaddr_t)addr),
  192                 round_page((vaddr_t)addr + len));
  193 }
  194 
  195 /*
  196  * uvm_proc_fork: fork a virtual address space
  197  *
  198  * - the address space is copied as per parent map's inherit values
  199  */
  200 void
  201 uvm_proc_fork(struct proc *p1, struct proc *p2, bool shared)
  202 {
  203 
  204         if (shared == true) {
  205                 p2->p_vmspace = NULL;
  206                 uvmspace_share(p1, p2);
  207         } else {
  208                 p2->p_vmspace = uvmspace_fork(p1->p_vmspace);
  209         }
  210 
  211         cpu_proc_fork(p1, p2);
  212 }
  213 
  214 
  215 /*
  216  * uvm_lwp_fork: fork a thread
  217  *
  218  * - a new "user" structure is allocated for the child process
  219  *      [filled in by MD layer...]
  220  * - if specified, the child gets a new user stack described by
  221  *      stack and stacksize
  222  * - NOTE: the kernel stack may be at a different location in the child
  223  *      process, and thus addresses of automatic variables may be invalid
  224  *      after cpu_lwp_fork returns in the child process.  We do nothing here
  225  *      after cpu_lwp_fork returns.
  226  * - XXXCDC: we need a way for this to return a failure value rather
  227  *   than just hang
  228  */
  229 void
  230 uvm_lwp_fork(struct lwp *l1, struct lwp *l2, void *stack, size_t stacksize,
  231     void (*func)(void *), void *arg)
  232 {
  233         int error;
  234 
  235         /*
  236          * Wire down the U-area for the process, which contains the PCB
  237          * and the kernel stack.  Wired state is stored in l->l_flag's
  238          * L_INMEM bit rather than in the vm_map_entry's wired count
  239          * to prevent kernel_map fragmentation.  If we reused a cached U-area,
  240          * L_INMEM will already be set and we don't need to do anything.
  241          *
  242          * Note the kernel stack gets read/write accesses right off the bat.
  243          */
  244 
  245         if ((l2->l_flag & LW_INMEM) == 0) {
  246                 vaddr_t uarea = USER_TO_UAREA(l2->l_addr);
  247 
  248                 if ((error = uarea_swapin(uarea)) != 0)
  249                         panic("%s: uvm_fault_wire failed: %d", __func__, error);
  250 #ifdef PMAP_UAREA
  251                 /* Tell the pmap this is a u-area mapping */
  252                 PMAP_UAREA(uarea);
  253 #endif
  254                 l2->l_flag |= LW_INMEM;
  255         }
  256 
  257 #ifdef KSTACK_CHECK_MAGIC
  258         /*
  259          * fill stack with magic number
  260          */
  261         kstack_setup_magic(l2);
  262 #endif
  263 
  264         /*
  265          * cpu_lwp_fork() copy and update the pcb, and make the child ready
  266          * to run.  If this is a normal user fork, the child will exit
  267          * directly to user mode via child_return() on its first time
  268          * slice and will not return here.  If this is a kernel thread,
  269          * the specified entry point will be executed.
  270          */
  271         cpu_lwp_fork(l1, l2, stack, stacksize, func, arg);
  272 }
  273 
  274 static int
  275 uarea_swapin(vaddr_t addr)
  276 {
  277 
  278         return uvm_fault_wire(kernel_map, addr, addr + USPACE,
  279             VM_PROT_READ | VM_PROT_WRITE, 0);
  280 }
  281 
  282 static void
  283 uarea_swapout(vaddr_t addr)
  284 {
  285 
  286         uvm_fault_unwire(kernel_map, addr, addr + USPACE);
  287 }
  288 
  289 #ifndef USPACE_ALIGN
  290 #define USPACE_ALIGN    0
  291 #endif
  292 
  293 static pool_cache_t uvm_uarea_cache;
  294 
  295 static int
  296 uarea_ctor(void *arg, void *obj, int flags)
  297 {
  298 
  299         KASSERT((flags & PR_WAITOK) != 0);
  300         return uarea_swapin((vaddr_t)obj);
  301 }
  302 
  303 static void *
  304 uarea_poolpage_alloc(struct pool *pp, int flags)
  305 {
  306 
  307         return (void *)uvm_km_alloc(kernel_map, pp->pr_alloc->pa_pagesz,
  308             USPACE_ALIGN, UVM_KMF_PAGEABLE |
  309             ((flags & PR_WAITOK) != 0 ? UVM_KMF_WAITVA :
  310             (UVM_KMF_NOWAIT | UVM_KMF_TRYLOCK)));
  311 }
  312 
  313 static void
  314 uarea_poolpage_free(struct pool *pp, void *addr)
  315 {
  316 
  317         uvm_km_free(kernel_map, (vaddr_t)addr, pp->pr_alloc->pa_pagesz,
  318             UVM_KMF_PAGEABLE);
  319 }
  320 
  321 static struct pool_allocator uvm_uarea_allocator = {
  322         .pa_alloc = uarea_poolpage_alloc,
  323         .pa_free = uarea_poolpage_free,
  324         .pa_pagesz = USPACE,
  325 };
  326 
  327 void
  328 uvm_uarea_init(void)
  329 {
  330         int flags = PR_NOTOUCH;
  331 
  332         /*
  333          * specify PR_NOALIGN unless the alignment provided by
  334          * the backend (USPACE_ALIGN) is sufficient to provide
  335          * pool page size (UPSACE) alignment.
  336          */
  337 
  338         if ((USPACE_ALIGN == 0 && USPACE != PAGE_SIZE) ||
  339             (USPACE_ALIGN % USPACE) != 0) {
  340                 flags |= PR_NOALIGN;
  341         }
  342 
  343         uvm_uarea_cache = pool_cache_init(USPACE, USPACE_ALIGN, 0, flags,
  344             "uarea", &uvm_uarea_allocator, IPL_NONE, uarea_ctor, NULL, NULL);
  345 }
  346 
  347 /*
  348  * uvm_uarea_alloc: allocate a u-area
  349  */
  350 
  351 bool
  352 uvm_uarea_alloc(vaddr_t *uaddrp)
  353 {
  354 
  355         *uaddrp = (vaddr_t)pool_cache_get(uvm_uarea_cache, PR_WAITOK);
  356         return true;
  357 }
  358 
  359 /*
  360  * uvm_uarea_free: free a u-area
  361  */
  362 
  363 void
  364 uvm_uarea_free(vaddr_t uaddr, struct cpu_info *ci)
  365 {
  366 
  367         pool_cache_put(uvm_uarea_cache, (void *)uaddr);
  368 }
  369 
  370 /*
  371  * uvm_proc_exit: exit a virtual address space
  372  *
  373  * - borrow proc0's address space because freeing the vmspace
  374  *   of the dead process may block.
  375  */
  376 
  377 void
  378 uvm_proc_exit(struct proc *p)
  379 {
  380         struct lwp *l = curlwp; /* XXX */
  381         struct vmspace *ovm;
  382 
  383         KASSERT(p == l->l_proc);
  384         ovm = p->p_vmspace;
  385 
  386         /*
  387          * borrow proc0's address space.
  388          */
  389         KPREEMPT_DISABLE(l);
  390         pmap_deactivate(l);
  391         p->p_vmspace = proc0.p_vmspace;
  392         pmap_activate(l);
  393         KPREEMPT_ENABLE(l);
  394 
  395         uvmspace_free(ovm);
  396 }
  397 
  398 void
  399 uvm_lwp_exit(struct lwp *l)
  400 {
  401         vaddr_t va = USER_TO_UAREA(l->l_addr);
  402 
  403         l->l_flag &= ~LW_INMEM;
  404         uvm_uarea_free(va, l->l_cpu);
  405         l->l_addr = NULL;
  406 }
  407 
  408 /*
  409  * uvm_init_limit: init per-process VM limits
  410  *
  411  * - called for process 0 and then inherited by all others.
  412  */
  413 
  414 void
  415 uvm_init_limits(struct proc *p)
  416 {
  417 
  418         /*
  419          * Set up the initial limits on process VM.  Set the maximum
  420          * resident set size to be all of (reasonably) available memory.
  421          * This causes any single, large process to start random page
  422          * replacement once it fills memory.
  423          */
  424 
  425         p->p_rlimit[RLIMIT_STACK].rlim_cur = DFLSSIZ;
  426         p->p_rlimit[RLIMIT_STACK].rlim_max = maxsmap;
  427         p->p_rlimit[RLIMIT_DATA].rlim_cur = DFLDSIZ;
  428         p->p_rlimit[RLIMIT_DATA].rlim_max = maxdmap;
  429         p->p_rlimit[RLIMIT_AS].rlim_cur = RLIM_INFINITY;
  430         p->p_rlimit[RLIMIT_AS].rlim_max = RLIM_INFINITY;
  431         p->p_rlimit[RLIMIT_RSS].rlim_cur = ptoa(uvmexp.free);
  432 }
  433 
  434 #ifdef DEBUG
  435 int     enableswap = 1;
  436 int     swapdebug = 0;
  437 #define SDB_FOLLOW      1
  438 #define SDB_SWAPIN      2
  439 #define SDB_SWAPOUT     4
  440 #endif
  441 
  442 /*
  443  * uvm_swapin: swap in an lwp's u-area.
  444  *
  445  * - must be called with the LWP's swap lock held.
  446  * - naturally, must not be called with l == curlwp
  447  */
  448 
  449 void
  450 uvm_swapin(struct lwp *l)
  451 {
  452         int error;
  453 
  454         /* XXXSMP notyet KASSERT(mutex_owned(&l->l_swaplock)); */
  455         KASSERT(l != curlwp);
  456 
  457         error = uarea_swapin(USER_TO_UAREA(l->l_addr));
  458         if (error) {
  459                 panic("%s: rewiring stack failed: %d", __func__, error);
  460         }
  461 
  462         /*
  463          * Some architectures need to be notified when the user area has
  464          * moved to new physical page(s) (e.g.  see mips/mips/vm_machdep.c).
  465          */
  466         cpu_swapin(l);
  467         lwp_lock(l);
  468         if (l->l_stat == LSRUN)
  469                 sched_enqueue(l, false);
  470         l->l_flag |= LW_INMEM;
  471         l->l_swtime = 0;
  472         lwp_unlock(l);
  473         ++uvmexp.swapins;
  474 }
  475 
  476 /*
  477  * uvm_kick_scheduler: kick the scheduler into action if not running.
  478  *
  479  * - called when swapped out processes have been awoken.
  480  */
  481 
  482 void
  483 uvm_kick_scheduler(void)
  484 {
  485 
  486         if (uvm.swap_running == false)
  487                 return;
  488 
  489         mutex_enter(&uvm_scheduler_mutex);
  490         uvm.scheduler_kicked = true;
  491         cv_signal(&uvm.scheduler_cv);
  492         mutex_exit(&uvm_scheduler_mutex);
  493 }
  494 
  495 /*
  496  * uvm_scheduler: process zero main loop
  497  *
  498  * - attempt to swapin every swaped-out, runnable process in order of
  499  *      priority.
  500  * - if not enough memory, wake the pagedaemon and let it clear space.
  501  */
  502 
  503 void
  504 uvm_scheduler(void)
  505 {
  506         struct lwp *l, *ll;
  507         int pri;
  508         int ppri;
  509 
  510         l = curlwp;
  511         lwp_lock(l);
  512         l->l_priority = PRI_VM;
  513         l->l_class = SCHED_FIFO;
  514         lwp_unlock(l);
  515 
  516         for (;;) {
  517 #ifdef DEBUG
  518                 mutex_enter(&uvm_scheduler_mutex);
  519                 while (!enableswap)
  520                         cv_wait(&uvm.scheduler_cv, &uvm_scheduler_mutex);
  521                 mutex_exit(&uvm_scheduler_mutex);
  522 #endif
  523                 ll = NULL;              /* process to choose */
  524                 ppri = INT_MIN;         /* its priority */
  525 
  526                 mutex_enter(proc_lock);
  527                 LIST_FOREACH(l, &alllwp, l_list) {
  528                         /* is it a runnable swapped out process? */
  529                         if (l->l_stat == LSRUN && !(l->l_flag & LW_INMEM)) {
  530                                 pri = l->l_swtime + l->l_slptime -
  531                                     (l->l_proc->p_nice - NZERO) * 8;
  532                                 if (pri > ppri) {   /* higher priority? */
  533                                         ll = l;
  534                                         ppri = pri;
  535                                 }
  536                         }
  537                 }
  538 #ifdef DEBUG
  539                 if (swapdebug & SDB_FOLLOW)
  540                         printf("%s: running, procp %p pri %d\n", __func__, ll,
  541                             ppri);
  542 #endif
  543                 /*
  544                  * Nothing to do, back to sleep
  545                  */
  546                 if ((l = ll) == NULL) {
  547                         mutex_exit(proc_lock);
  548                         mutex_enter(&uvm_scheduler_mutex);
  549                         if (uvm.scheduler_kicked == false)
  550                                 cv_wait(&uvm.scheduler_cv,
  551                                     &uvm_scheduler_mutex);
  552                         uvm.scheduler_kicked = false;
  553                         mutex_exit(&uvm_scheduler_mutex);
  554                         continue;
  555                 }
  556 
  557                 /*
  558                  * we have found swapped out process which we would like
  559                  * to bring back in.
  560                  *
  561                  * XXX: this part is really bogus cuz we could deadlock
  562                  * on memory despite our feeble check
  563                  */
  564                 if (uvmexp.free > atop(USPACE)) {
  565 #ifdef DEBUG
  566                         if (swapdebug & SDB_SWAPIN)
  567                                 printf("swapin: pid %d(%s)@%p, pri %d "
  568                                     "free %d\n", l->l_proc->p_pid,
  569                                     l->l_proc->p_comm, l->l_addr, ppri,
  570                                     uvmexp.free);
  571 #endif
  572                         mutex_enter(&l->l_swaplock);
  573                         mutex_exit(proc_lock);
  574                         uvm_swapin(l);
  575                         mutex_exit(&l->l_swaplock);
  576                         continue;
  577                 } else {
  578                         /*
  579                          * not enough memory, jab the pageout daemon and
  580                          * wait til the coast is clear
  581                          */
  582                         mutex_exit(proc_lock);
  583 #ifdef DEBUG
  584                         if (swapdebug & SDB_FOLLOW)
  585                                 printf("%s: no room for pid %d(%s),"
  586                                     " free %d\n", __func__, l->l_proc->p_pid,
  587                                     l->l_proc->p_comm, uvmexp.free);
  588 #endif
  589                         uvm_wait("schedpwait");
  590 #ifdef DEBUG
  591                         if (swapdebug & SDB_FOLLOW)
  592                                 printf("%s: room again, free %d\n", __func__,
  593                                     uvmexp.free);
  594 #endif
  595                 }
  596         }
  597 }
  598 
  599 /*
  600  * swappable: is LWP "l" swappable?
  601  */
  602 
  603 static bool
  604 swappable(struct lwp *l)
  605 {
  606 
  607         if ((l->l_flag & (LW_INMEM|LW_SYSTEM|LW_WEXIT)) != LW_INMEM)
  608                 return false;
  609         if ((l->l_pflag & LP_RUNNING) != 0)
  610                 return false;
  611         if (l->l_holdcnt != 0)
  612                 return false;
  613         if (l->l_class != SCHED_OTHER)
  614                 return false;
  615         if (l->l_syncobj == &rw_syncobj || l->l_syncobj == &mutex_syncobj)
  616                 return false;
  617         if (l->l_proc->p_stat != SACTIVE && l->l_proc->p_stat != SSTOP)
  618                 return false;
  619         return true;
  620 }
  621 
  622 /*
  623  * swapout_threads: find threads that can be swapped and unwire their
  624  *      u-areas.
  625  *
  626  * - called by the pagedaemon
  627  * - try and swap at least one processs
  628  * - processes that are sleeping or stopped for maxslp or more seconds
  629  *   are swapped... otherwise the longest-sleeping or stopped process
  630  *   is swapped, otherwise the longest resident process...
  631  */
  632 
  633 void
  634 uvm_swapout_threads(void)
  635 {
  636         struct lwp *l;
  637         struct lwp *outl, *outl2;
  638         int outpri, outpri2;
  639         int didswap = 0;
  640         extern int maxslp;
  641         bool gotit;
  642 
  643         /* XXXCDC: should move off to uvmexp. or uvm., also in uvm_meter */
  644 
  645 #ifdef DEBUG
  646         if (!enableswap)
  647                 return;
  648 #endif
  649 
  650         /*
  651          * outl/outpri  : stop/sleep thread with largest sleeptime < maxslp
  652          * outl2/outpri2: the longest resident thread (its swap time)
  653          */
  654         outl = outl2 = NULL;
  655         outpri = outpri2 = 0;
  656 
  657  restart:
  658         mutex_enter(proc_lock);
  659         LIST_FOREACH(l, &alllwp, l_list) {
  660                 KASSERT(l->l_proc != NULL);
  661                 if (!mutex_tryenter(&l->l_swaplock))
  662                         continue;
  663                 if (!swappable(l)) {
  664                         mutex_exit(&l->l_swaplock);
  665                         continue;
  666                 }
  667                 switch (l->l_stat) {
  668                 case LSONPROC:
  669                         break;
  670 
  671                 case LSRUN:
  672                         if (l->l_swtime > outpri2) {
  673                                 outl2 = l;
  674                                 outpri2 = l->l_swtime;
  675                         }
  676                         break;
  677 
  678                 case LSSLEEP:
  679                 case LSSTOP:
  680                         if (l->l_slptime >= maxslp) {
  681                                 mutex_exit(proc_lock);
  682                                 uvm_swapout(l);
  683                                 /*
  684                                  * Locking in the wrong direction -
  685                                  * try to prevent the LWP from exiting.
  686                                  */
  687                                 gotit = mutex_tryenter(proc_lock);
  688                                 mutex_exit(&l->l_swaplock);
  689                                 didswap++;
  690                                 if (!gotit)
  691                                         goto restart;
  692                                 continue;
  693                         } else if (l->l_slptime > outpri) {
  694                                 outl = l;
  695                                 outpri = l->l_slptime;
  696                         }
  697                         break;
  698                 }
  699                 mutex_exit(&l->l_swaplock);
  700         }
  701 
  702         /*
  703          * If we didn't get rid of any real duds, toss out the next most
  704          * likely sleeping/stopped or running candidate.  We only do this
  705          * if we are real low on memory since we don't gain much by doing
  706          * it (USPACE bytes).
  707          */
  708         if (didswap == 0 && uvmexp.free <= atop(round_page(USPACE))) {
  709                 if ((l = outl) == NULL)
  710                         l = outl2;
  711 #ifdef DEBUG
  712                 if (swapdebug & SDB_SWAPOUT)
  713                         printf("%s: no duds, try procp %p\n", __func__, l);
  714 #endif
  715                 if (l) {
  716                         mutex_enter(&l->l_swaplock);
  717                         mutex_exit(proc_lock);
  718                         if (swappable(l))
  719                                 uvm_swapout(l);
  720                         mutex_exit(&l->l_swaplock);
  721                         return;
  722                 }
  723         }
  724 
  725         mutex_exit(proc_lock);
  726 }
  727 
  728 /*
  729  * uvm_swapout: swap out lwp "l"
  730  *
  731  * - currently "swapout" means "unwire U-area" and "pmap_collect()"
  732  *   the pmap.
  733  * - must be called with l->l_swaplock held.
  734  * - XXXCDC: should deactivate all process' private anonymous memory
  735  */
  736 
  737 static void
  738 uvm_swapout(struct lwp *l)
  739 {
  740         struct vm_map *map;
  741 
  742         KASSERT(mutex_owned(&l->l_swaplock));
  743 
  744 #ifdef DEBUG
  745         if (swapdebug & SDB_SWAPOUT)
  746                 printf("%s: lid %d.%d(%s)@%p, stat %x pri %d free %d\n",
  747                    __func__, l->l_proc->p_pid, l->l_lid, l->l_proc->p_comm,
  748                    l->l_addr, l->l_stat, l->l_slptime, uvmexp.free);
  749 #endif
  750 
  751         /*
  752          * Mark it as (potentially) swapped out.
  753          */
  754         lwp_lock(l);
  755         if (!swappable(l)) {
  756                 KDASSERT(l->l_cpu != curcpu());
  757                 lwp_unlock(l);
  758                 return;
  759         }
  760         l->l_flag &= ~LW_INMEM;
  761         l->l_swtime = 0;
  762         if (l->l_stat == LSRUN)
  763                 sched_dequeue(l);
  764         lwp_unlock(l);
  765         l->l_ru.ru_nswap++;
  766         ++uvmexp.swapouts;
  767 
  768         /*
  769          * Do any machine-specific actions necessary before swapout.
  770          * This can include saving floating point state, etc.
  771          */
  772         cpu_swapout(l);
  773 
  774         /*
  775          * Unwire the to-be-swapped process's user struct and kernel stack.
  776          */
  777         uarea_swapout(USER_TO_UAREA(l->l_addr));
  778         map = &l->l_proc->p_vmspace->vm_map;
  779         if (vm_map_lock_try(map)) {
  780                 pmap_collect(vm_map_pmap(map));
  781                 vm_map_unlock(map);
  782         }
  783 }
  784 
  785 /*
  786  * uvm_lwp_hold: prevent lwp "l" from being swapped out, and bring
  787  * back into memory if it is currently swapped.
  788  */
  789  
  790 void
  791 uvm_lwp_hold(struct lwp *l)
  792 {
  793 
  794         if (l == curlwp) {
  795                 atomic_inc_uint(&l->l_holdcnt);
  796         } else {
  797                 mutex_enter(&l->l_swaplock);
  798                 if (atomic_inc_uint_nv(&l->l_holdcnt) == 1 &&
  799                     (l->l_flag & LW_INMEM) == 0)
  800                         uvm_swapin(l);
  801                 mutex_exit(&l->l_swaplock);
  802         }
  803 }
  804 
  805 /*
  806  * uvm_lwp_rele: release a hold on lwp "l".  when the holdcount
  807  * drops to zero, it's eligable to be swapped.
  808  */
  809  
  810 void
  811 uvm_lwp_rele(struct lwp *l)
  812 {
  813 
  814         KASSERT(l->l_holdcnt != 0);
  815 
  816         atomic_dec_uint(&l->l_holdcnt);
  817 }
  818 
  819 #ifdef COREDUMP
  820 /*
  821  * uvm_coredump_walkmap: walk a process's map for the purpose of dumping
  822  * a core file.
  823  */
  824 
  825 int
  826 uvm_coredump_walkmap(struct proc *p, void *iocookie,
  827     int (*func)(struct proc *, void *, struct uvm_coredump_state *),
  828     void *cookie)
  829 {
  830         struct uvm_coredump_state state;
  831         struct vmspace *vm = p->p_vmspace;
  832         struct vm_map *map = &vm->vm_map;
  833         struct vm_map_entry *entry;
  834         int error;
  835 
  836         entry = NULL;
  837         vm_map_lock_read(map);
  838         state.end = 0;
  839         for (;;) {
  840                 if (entry == NULL)
  841                         entry = map->header.next;
  842                 else if (!uvm_map_lookup_entry(map, state.end, &entry))
  843                         entry = entry->next;
  844                 if (entry == &map->header)
  845                         break;
  846 
  847                 state.cookie = cookie;
  848                 if (state.end > entry->start) {
  849                         state.start = state.end;
  850                 } else {
  851                         state.start = entry->start;
  852                 }
  853                 state.realend = entry->end;
  854                 state.end = entry->end;
  855                 state.prot = entry->protection;
  856                 state.flags = 0;
  857 
  858                 /*
  859                  * Dump the region unless one of the following is true:
  860                  *
  861                  * (1) the region has neither object nor amap behind it
  862                  *     (ie. it has never been accessed).
  863                  *
  864                  * (2) the region has no amap and is read-only
  865                  *     (eg. an executable text section).
  866                  *
  867                  * (3) the region's object is a device.
  868                  *
  869                  * (4) the region is unreadable by the process.
  870                  */
  871 
  872                 KASSERT(!UVM_ET_ISSUBMAP(entry));
  873                 KASSERT(state.start < VM_MAXUSER_ADDRESS);
  874                 KASSERT(state.end <= VM_MAXUSER_ADDRESS);
  875                 if (entry->object.uvm_obj == NULL &&
  876                     entry->aref.ar_amap == NULL) {
  877                         state.realend = state.start;
  878                 } else if ((entry->protection & VM_PROT_WRITE) == 0 &&
  879                     entry->aref.ar_amap == NULL) {
  880                         state.realend = state.start;
  881                 } else if (entry->object.uvm_obj != NULL &&
  882                     UVM_OBJ_IS_DEVICE(entry->object.uvm_obj)) {
  883                         state.realend = state.start;
  884                 } else if ((entry->protection & VM_PROT_READ) == 0) {
  885                         state.realend = state.start;
  886                 } else {
  887                         if (state.start >= (vaddr_t)vm->vm_maxsaddr)
  888                                 state.flags |= UVM_COREDUMP_STACK;
  889 
  890                         /*
  891                          * If this an anonymous entry, only dump instantiated
  892                          * pages.
  893                          */
  894                         if (entry->object.uvm_obj == NULL) {
  895                                 vaddr_t end;
  896 
  897                                 amap_lock(entry->aref.ar_amap);
  898                                 for (end = state.start;
  899                                      end < state.end; end += PAGE_SIZE) {
  900                                         struct vm_anon *anon;
  901                                         anon = amap_lookup(&entry->aref,
  902                                             end - entry->start);
  903                                         /*
  904                                          * If we have already encountered an
  905                                          * uninstantiated page, stop at the
  906                                          * first instantied page.
  907                                          */
  908                                         if (anon != NULL &&
  909                                             state.realend != state.end) {
  910                                                 state.end = end;
  911                                                 break;
  912                                         }
  913 
  914                                         /*
  915                                          * If this page is the first
  916                                          * uninstantiated page, mark this as
  917                                          * the real ending point.  Continue to
  918                                          * counting uninstantiated pages.
  919                                          */
  920                                         if (anon == NULL &&
  921                                             state.realend == state.end) {
  922                                                 state.realend = end;
  923                                         }
  924                                 }
  925                                 amap_unlock(entry->aref.ar_amap);
  926                         }
  927                 }
  928                 
  929 
  930                 vm_map_unlock_read(map);
  931                 error = (*func)(p, iocookie, &state);
  932                 if (error)
  933                         return (error);
  934                 vm_map_lock_read(map);
  935         }
  936         vm_map_unlock_read(map);
  937 
  938         return (0);
  939 }
  940 #endif /* COREDUMP */

Cache object: 07e4df77dc15497576e6c58380665fe6


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.