The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/vm/vm_glue.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  * Copyright (c) 1991, 1993
    3  *      The Regents of the University of California.  All rights reserved.
    4  *
    5  * This code is derived from software contributed to Berkeley by
    6  * The Mach Operating System project at Carnegie-Mellon University.
    7  *
    8  * Redistribution and use in source and binary forms, with or without
    9  * modification, are permitted provided that the following conditions
   10  * are met:
   11  * 1. Redistributions of source code must retain the above copyright
   12  *    notice, this list of conditions and the following disclaimer.
   13  * 2. Redistributions in binary form must reproduce the above copyright
   14  *    notice, this list of conditions and the following disclaimer in the
   15  *    documentation and/or other materials provided with the distribution.
   16  * 3. All advertising materials mentioning features or use of this software
   17  *    must display the following acknowledgement:
   18  *      This product includes software developed by the University of
   19  *      California, Berkeley and its contributors.
   20  * 4. Neither the name of the University nor the names of its contributors
   21  *    may be used to endorse or promote products derived from this software
   22  *    without specific prior written permission.
   23  *
   24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   34  * SUCH DAMAGE.
   35  *
   36  *      from: @(#)vm_glue.c     8.6 (Berkeley) 1/5/94
   37  *
   38  *
   39  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
   40  * All rights reserved.
   41  *
   42  * Permission to use, copy, modify and distribute this software and
   43  * its documentation is hereby granted, provided that both the copyright
   44  * notice and this permission notice appear in all copies of the
   45  * software, derivative works or modified versions, and any portions
   46  * thereof, and that both notices appear in supporting documentation.
   47  *
   48  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
   49  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
   50  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
   51  *
   52  * Carnegie Mellon requests users of this software to return to
   53  *
   54  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
   55  *  School of Computer Science
   56  *  Carnegie Mellon University
   57  *  Pittsburgh PA 15213-3890
   58  *
   59  * any improvements or extensions that they make and grant Carnegie the
   60  * rights to redistribute these changes.
   61  *
   62  * $FreeBSD$
   63  */
   64 
   65 #include "opt_rlimit.h"
   66 #include "opt_vm.h"
   67 
   68 #include <sys/param.h>
   69 #include <sys/systm.h>
   70 #include <sys/proc.h>
   71 #include <sys/resourcevar.h>
   72 #include <sys/buf.h>
   73 #include <sys/shm.h>
   74 #include <sys/vmmeter.h>
   75 #include <sys/sysctl.h>
   76 
   77 #include <sys/kernel.h>
   78 #include <sys/unistd.h>
   79 
   80 #include <machine/limits.h>
   81 
   82 #include <vm/vm.h>
   83 #include <vm/vm_param.h>
   84 #include <vm/vm_prot.h>
   85 #include <sys/lock.h>
   86 #include <vm/pmap.h>
   87 #include <vm/vm_map.h>
   88 #include <vm/vm_page.h>
   89 #include <vm/vm_pageout.h>
   90 #include <vm/vm_kern.h>
   91 #include <vm/vm_extern.h>
   92 
   93 #include <sys/user.h>
   94 
   95 /*
   96  * System initialization
   97  *
   98  * Note: proc0 from proc.h
   99  */
  100 
  101 static void vm_init_limits __P((void *));
  102 SYSINIT(vm_limits, SI_SUB_VM_CONF, SI_ORDER_FIRST, vm_init_limits, &proc0)
  103 
  104 /*
  105  * THIS MUST BE THE LAST INITIALIZATION ITEM!!!
  106  *
  107  * Note: run scheduling should be divorced from the vm system.
  108  */
  109 static void scheduler __P((void *));
  110 SYSINIT(scheduler, SI_SUB_RUN_SCHEDULER, SI_ORDER_FIRST, scheduler, NULL)
  111 
  112 
  113 static void swapout __P((struct proc *));
  114 
  115 extern char kstack[];
  116 
  117 /* vm_map_t upages_map; */
  118 
  119 int
  120 kernacc(addr, len, rw)
  121         caddr_t addr;
  122         int len, rw;
  123 {
  124         boolean_t rv;
  125         vm_offset_t saddr, eaddr;
  126         vm_prot_t prot = rw == B_READ ? VM_PROT_READ : VM_PROT_WRITE;
  127 
  128         saddr = trunc_page((vm_offset_t)addr);
  129         eaddr = round_page((vm_offset_t)addr + len);
  130         vm_map_lock_read(kernel_map);
  131         rv = vm_map_check_protection(kernel_map, saddr, eaddr, prot);
  132         vm_map_unlock_read(kernel_map);
  133         return (rv == TRUE);
  134 }
  135 
  136 int
  137 useracc(addr, len, rw)
  138         caddr_t addr;
  139         int len, rw;
  140 {
  141         boolean_t rv;
  142         vm_prot_t prot = rw == B_READ ? VM_PROT_READ : VM_PROT_WRITE;
  143         vm_map_t map;
  144         vm_map_entry_t save_hint;
  145 
  146         /*
  147          * XXX - check separately to disallow access to user area and user
  148          * page tables - they are in the map.
  149          *
  150          * XXX - VM_MAXUSER_ADDRESS is an end address, not a max.  It was once
  151          * only used (as an end address) in trap.c.  Use it as an end address
  152          * here too.  This bogusness has spread.  I just fixed where it was
  153          * used as a max in vm_mmap.c.
  154          */
  155         if ((vm_offset_t) addr + len > /* XXX */ VM_MAXUSER_ADDRESS
  156             || (vm_offset_t) addr + len < (vm_offset_t) addr) {
  157                 return (FALSE);
  158         }
  159         map = &curproc->p_vmspace->vm_map;
  160         vm_map_lock_read(map);
  161         /*
  162          * We save the map hint, and restore it.  Useracc appears to distort
  163          * the map hint unnecessarily.
  164          */
  165         save_hint = map->hint;
  166         rv = vm_map_check_protection(map,
  167             trunc_page((vm_offset_t)addr), round_page((vm_offset_t)addr + len), prot);
  168         map->hint = save_hint;
  169         vm_map_unlock_read(map);
  170         
  171         return (rv == TRUE);
  172 }
  173 
  174 void
  175 vslock(addr, len)
  176         caddr_t addr;
  177         u_int len;
  178 {
  179         vm_map_pageable(&curproc->p_vmspace->vm_map, trunc_page((vm_offset_t)addr),
  180             round_page((vm_offset_t)addr + len), FALSE);
  181 }
  182 
  183 void
  184 vsunlock(addr, len, dirtied)
  185         caddr_t addr;
  186         u_int len;
  187         int dirtied;
  188 {
  189 #ifdef  lint
  190         dirtied++;
  191 #endif  /* lint */
  192         vm_map_pageable(&curproc->p_vmspace->vm_map, trunc_page((vm_offset_t)addr),
  193             round_page((vm_offset_t)addr + len), TRUE);
  194 }
  195 
  196 /*
  197  * Implement fork's actions on an address space.
  198  * Here we arrange for the address space to be copied or referenced,
  199  * allocate a user struct (pcb and kernel stack), then call the
  200  * machine-dependent layer to fill those in and make the new process
  201  * ready to run.  The new process is set up so that it returns directly
  202  * to user mode to avoid stack copying and relocation problems.
  203  */
  204 void
  205 vm_fork(p1, p2, flags)
  206         register struct proc *p1, *p2;
  207         int flags;
  208 {
  209         register struct user *up;
  210 
  211         if (flags & RFMEM) {
  212                 p2->p_vmspace = p1->p_vmspace;
  213                 p1->p_vmspace->vm_refcnt++;
  214         }
  215 
  216         while ((cnt.v_free_count + cnt.v_cache_count) < cnt.v_free_min) {
  217                 vm_pageout_deficit += (UPAGES + VM_INITIAL_PAGEIN);
  218                 VM_WAIT;
  219         }
  220 
  221         if ((flags & RFMEM) == 0) {
  222                 p2->p_vmspace = vmspace_fork(p1->p_vmspace);
  223 
  224                 pmap_pinit2(vmspace_pmap(p2->p_vmspace));
  225 
  226                 if (p1->p_vmspace->vm_shm)
  227                         shmfork(p1, p2);
  228         }
  229 
  230         pmap_new_proc(p2);
  231 
  232         up = p2->p_addr;
  233 
  234         /*
  235          * p_stats currently points at fields in the user struct
  236          * but not at &u, instead at p_addr. Copy parts of
  237          * p_stats; zero the rest of p_stats (statistics).
  238          *
  239          * If procsig->ps_refcnt is 1 and p2->p_sigacts is NULL we dont' need
  240          * to share sigacts, so we use the up->u_sigacts.
  241          */
  242         p2->p_stats = &up->u_stats;
  243         if (p2->p_sigacts == NULL) {
  244                 if (p2->p_procsig->ps_refcnt != 1)
  245                         printf ("PID:%d NULL sigacts with refcnt not 1!\n",p2->p_pid);
  246                 p2->p_sigacts = &up->u_sigacts;
  247                 up->u_sigacts = *p1->p_sigacts;
  248         }
  249 
  250         bzero(&up->u_stats.pstat_startzero,
  251             (unsigned) ((caddr_t) &up->u_stats.pstat_endzero -
  252                 (caddr_t) &up->u_stats.pstat_startzero));
  253         bcopy(&p1->p_stats->pstat_startcopy, &up->u_stats.pstat_startcopy,
  254             ((caddr_t) &up->u_stats.pstat_endcopy -
  255                 (caddr_t) &up->u_stats.pstat_startcopy));
  256 
  257 
  258         /*
  259          * cpu_fork will copy and update the pcb, set up the kernel stack,
  260          * and make the child ready to run.
  261          */
  262         cpu_fork(p1, p2);
  263 }
  264 
  265 /*
  266  * Set default limits for VM system.
  267  * Called for proc 0, and then inherited by all others.
  268  *
  269  * XXX should probably act directly on proc0.
  270  */
  271 static void
  272 vm_init_limits(udata)
  273         void *udata;
  274 {
  275         register struct proc *p = udata;
  276         int rss_limit;
  277 
  278         /*
  279          * Set up the initial limits on process VM. Set the maximum resident
  280          * set size to be half of (reasonably) available memory.  Since this
  281          * is a soft limit, it comes into effect only when the system is out
  282          * of memory - half of main memory helps to favor smaller processes,
  283          * and reduces thrashing of the object cache.
  284          */
  285         p->p_rlimit[RLIMIT_STACK].rlim_cur = DFLSSIZ;
  286         p->p_rlimit[RLIMIT_STACK].rlim_max = MAXSSIZ;
  287         p->p_rlimit[RLIMIT_DATA].rlim_cur = DFLDSIZ;
  288         p->p_rlimit[RLIMIT_DATA].rlim_max = MAXDSIZ;
  289         /* limit the limit to no less than 2MB */
  290         rss_limit = max(cnt.v_free_count, 512);
  291         p->p_rlimit[RLIMIT_RSS].rlim_cur = ptoa(rss_limit);
  292         p->p_rlimit[RLIMIT_RSS].rlim_max = RLIM_INFINITY;
  293 }
  294 
  295 void
  296 faultin(p)
  297         struct proc *p;
  298 {
  299         int s;
  300 
  301         if ((p->p_flag & P_INMEM) == 0) {
  302 
  303                 ++p->p_lock;
  304 
  305                 pmap_swapin_proc(p);
  306 
  307                 s = splhigh();
  308 
  309                 if (p->p_stat == SRUN)
  310                         setrunqueue(p);
  311 
  312                 p->p_flag |= P_INMEM;
  313 
  314                 /* undo the effect of setting SLOCK above */
  315                 --p->p_lock;
  316                 splx(s);
  317 
  318         }
  319 }
  320 
  321 /*
  322  * This swapin algorithm attempts to swap-in processes only if there
  323  * is enough space for them.  Of course, if a process waits for a long
  324  * time, it will be swapped in anyway.
  325  */
  326 /* ARGSUSED*/
  327 static void
  328 scheduler(dummy)
  329         void *dummy;
  330 {
  331         register struct proc *p;
  332         register int pri;
  333         struct proc *pp;
  334         int ppri;
  335 
  336 loop:
  337         while ((cnt.v_free_count + cnt.v_cache_count) < cnt.v_free_min) {
  338                 VM_WAIT;
  339         }
  340 
  341         pp = NULL;
  342         ppri = INT_MIN;
  343         for (p = allproc.lh_first; p != 0; p = p->p_list.le_next) {
  344                 if (p->p_stat == SRUN &&
  345                         (p->p_flag & (P_INMEM | P_SWAPPING)) == 0) {
  346 
  347                         pri = p->p_swtime + p->p_slptime;
  348                         if ((p->p_flag & P_SWAPINREQ) == 0) {
  349                                 pri -= p->p_nice * 8;
  350                         }
  351 
  352                         /*
  353                          * if this process is higher priority and there is
  354                          * enough space, then select this process instead of
  355                          * the previous selection.
  356                          */
  357                         if (pri > ppri) {
  358                                 pp = p;
  359                                 ppri = pri;
  360                         }
  361                 }
  362         }
  363 
  364         /*
  365          * Nothing to do, back to sleep.
  366          */
  367         if ((p = pp) == NULL) {
  368                 tsleep(&proc0, PVM, "sched", 0);
  369                 goto loop;
  370         }
  371         p->p_flag &= ~P_SWAPINREQ;
  372 
  373         /*
  374          * We would like to bring someone in. (only if there is space).
  375          */
  376         faultin(p);
  377         p->p_swtime = 0;
  378         goto loop;
  379 }
  380 
  381 #ifndef NO_SWAPPING
  382 
  383 #define swappable(p) \
  384         (((p)->p_lock == 0) && \
  385                 ((p)->p_flag & (P_TRACED|P_NOSWAP|P_SYSTEM|P_INMEM|P_WEXIT|P_PHYSIO|P_SWAPPING)) == P_INMEM)
  386 
  387 
  388 /*
  389  * Swap_idle_threshold1 is the guaranteed swapped in time for a process
  390  */
  391 static int swap_idle_threshold1 = 2;
  392 SYSCTL_INT(_vm, OID_AUTO, swap_idle_threshold1,
  393         CTLFLAG_RW, &swap_idle_threshold1, 0, "");
  394 
  395 /*
  396  * Swap_idle_threshold2 is the time that a process can be idle before
  397  * it will be swapped out, if idle swapping is enabled.
  398  */
  399 static int swap_idle_threshold2 = 10;
  400 SYSCTL_INT(_vm, OID_AUTO, swap_idle_threshold2,
  401         CTLFLAG_RW, &swap_idle_threshold2, 0, "");
  402 
  403 /*
  404  * Swapout is driven by the pageout daemon.  Very simple, we find eligible
  405  * procs and unwire their u-areas.  We try to always "swap" at least one
  406  * process in case we need the room for a swapin.
  407  * If any procs have been sleeping/stopped for at least maxslp seconds,
  408  * they are swapped.  Else, we swap the longest-sleeping or stopped process,
  409  * if any, otherwise the longest-resident process.
  410  */
  411 void
  412 swapout_procs(action)
  413 int action;
  414 {
  415         register struct proc *p;
  416         struct proc *outp, *outp2;
  417         int outpri, outpri2;
  418         int didswap = 0;
  419 
  420         outp = outp2 = NULL;
  421         outpri = outpri2 = INT_MIN;
  422 retry:
  423         for (p = allproc.lh_first; p != 0; p = p->p_list.le_next) {
  424                 struct vmspace *vm;
  425                 if (!swappable(p))
  426                         continue;
  427 
  428                 vm = p->p_vmspace;
  429 
  430                 switch (p->p_stat) {
  431                 default:
  432                         continue;
  433 
  434                 case SSLEEP:
  435                 case SSTOP:
  436                         /*
  437                          * do not swapout a realtime process
  438                          */
  439                         if (RTP_PRIO_IS_REALTIME(p->p_rtprio.type))
  440                                 continue;
  441 
  442                         /*
  443                          * Do not swapout a process waiting on a critical
  444                          * event of some kind.  Also guarantee swap_idle_threshold1
  445                          * time in memory.
  446                          */
  447                         if (((p->p_priority & 0x7f) < PSOCK) ||
  448                                 (p->p_slptime < swap_idle_threshold1))
  449                                 continue;
  450 
  451                         /*
  452                          * If the system is under memory stress, or if we are swapping
  453                          * idle processes >= swap_idle_threshold2, then swap the process
  454                          * out.
  455                          */
  456                         if (((action & VM_SWAP_NORMAL) == 0) &&
  457                                 (((action & VM_SWAP_IDLE) == 0) ||
  458                                   (p->p_slptime < swap_idle_threshold2)))
  459                                 continue;
  460 
  461                         ++vm->vm_refcnt;
  462                         /*
  463                          * do not swapout a process that is waiting for VM
  464                          * data structures there is a possible deadlock.
  465                          */
  466                         if (lockmgr(&vm->vm_map.lock,
  467                                         LK_EXCLUSIVE | LK_NOWAIT,
  468                                         (void *)0, curproc)) {
  469                                 vmspace_free(vm);
  470                                 continue;
  471                         }
  472                         vm_map_unlock(&vm->vm_map);
  473                         /*
  474                          * If the process has been asleep for awhile and had
  475                          * most of its pages taken away already, swap it out.
  476                          */
  477                         if ((action & VM_SWAP_NORMAL) ||
  478                                 ((action & VM_SWAP_IDLE) &&
  479                                  (p->p_slptime > swap_idle_threshold2))) {
  480                                 swapout(p);
  481                                 vmspace_free(vm);
  482                                 didswap++;
  483                                 goto retry;
  484                         }
  485                 }
  486         }
  487         /*
  488          * If we swapped something out, and another process needed memory,
  489          * then wakeup the sched process.
  490          */
  491         if (didswap)
  492                 wakeup(&proc0);
  493 }
  494 
  495 static void
  496 swapout(p)
  497         register struct proc *p;
  498 {
  499 
  500 #if defined(SWAP_DEBUG)
  501         printf("swapping out %d\n", p->p_pid);
  502 #endif
  503         ++p->p_stats->p_ru.ru_nswap;
  504         /*
  505          * remember the process resident count
  506          */
  507         p->p_vmspace->vm_swrss =
  508             p->p_vmspace->vm_pmap.pm_stats.resident_count;
  509 
  510         (void) splhigh();
  511         p->p_flag &= ~P_INMEM;
  512         p->p_flag |= P_SWAPPING;
  513         if (p->p_stat == SRUN)
  514                 remrq(p);
  515         (void) spl0();
  516 
  517         pmap_swapout_proc(p);
  518 
  519         p->p_flag &= ~P_SWAPPING;
  520         p->p_swtime = 0;
  521 }
  522 #endif /* !NO_SWAPPING */

Cache object: e78a794ac1b15e6b58402ce6e22c7925


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.