FreeBSD/Linux Kernel Cross Reference
sys/vm/vm_glue.c
1 /*
2 * Copyright (c) 1991, 1993
3 * The Regents of the University of California. All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * The Mach Operating System project at Carnegie-Mellon University.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 * must display the following acknowledgement:
18 * This product includes software developed by the University of
19 * California, Berkeley and its contributors.
20 * 4. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 *
36 * from: @(#)vm_glue.c 8.6 (Berkeley) 1/5/94
37 *
38 *
39 * Copyright (c) 1987, 1990 Carnegie-Mellon University.
40 * All rights reserved.
41 *
42 * Permission to use, copy, modify and distribute this software and
43 * its documentation is hereby granted, provided that both the copyright
44 * notice and this permission notice appear in all copies of the
45 * software, derivative works or modified versions, and any portions
46 * thereof, and that both notices appear in supporting documentation.
47 *
48 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
49 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
50 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
51 *
52 * Carnegie Mellon requests users of this software to return to
53 *
54 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
55 * School of Computer Science
56 * Carnegie Mellon University
57 * Pittsburgh PA 15213-3890
58 *
59 * any improvements or extensions that they make and grant Carnegie the
60 * rights to redistribute these changes.
61 *
62 * $FreeBSD: src/sys/vm/vm_glue.c,v 1.55.2.4 1999/09/05 08:24:24 peter Exp $
63 */
64
65 #include "opt_rlimit.h"
66
67 #include <sys/param.h>
68 #include <sys/systm.h>
69 #include <sys/proc.h>
70 #include <sys/resourcevar.h>
71 #include <sys/buf.h>
72 #include <sys/shm.h>
73 #include <sys/vmmeter.h>
74
75 #include <sys/kernel.h>
76 #include <sys/dkstat.h>
77
78 #include <vm/vm.h>
79 #include <vm/vm_param.h>
80 #include <vm/vm_inherit.h>
81 #include <vm/vm_prot.h>
82 #include <vm/lock.h>
83 #include <vm/pmap.h>
84 #include <vm/vm_map.h>
85 #include <vm/vm_page.h>
86 #include <vm/vm_pageout.h>
87 #include <vm/vm_kern.h>
88 #include <vm/vm_extern.h>
89 #include <vm/vm_object.h>
90 #include <vm/vm_pager.h>
91
92 #include <sys/user.h>
93
94 /*
95 * System initialization
96 *
97 * Note: proc0 from proc.h
98 */
99
100 static void vm_init_limits __P((void *));
101 SYSINIT(vm_limits, SI_SUB_VM_CONF, SI_ORDER_FIRST, vm_init_limits, &proc0)
102
103 /*
104 * THIS MUST BE THE LAST INITIALIZATION ITEM!!!
105 *
106 * Note: run scheduling should be divorced from the vm system.
107 */
108 static void scheduler __P((void *));
109 SYSINIT(scheduler, SI_SUB_RUN_SCHEDULER, SI_ORDER_FIRST, scheduler, NULL)
110
111
112 static void swapout __P((struct proc *));
113
114 extern char kstack[];
115
116 /* vm_map_t upages_map; */
117
118 int
119 kernacc(addr, len, rw)
120 caddr_t addr;
121 int len, rw;
122 {
123 boolean_t rv;
124 vm_offset_t saddr, eaddr;
125 vm_prot_t prot = rw == B_READ ? VM_PROT_READ : VM_PROT_WRITE;
126
127 saddr = trunc_page(addr);
128 eaddr = round_page(addr + len);
129 vm_map_lock_read(kernel_map);
130 rv = vm_map_check_protection(kernel_map, saddr, eaddr, prot);
131 vm_map_unlock_read(kernel_map);
132 return (rv == TRUE);
133 }
134
135 int
136 useracc(addr, len, rw)
137 caddr_t addr;
138 int len, rw;
139 {
140 boolean_t rv;
141 vm_prot_t prot = rw == B_READ ? VM_PROT_READ : VM_PROT_WRITE;
142 vm_map_t map;
143 vm_map_entry_t save_hint;
144
145 /*
146 * XXX - check separately to disallow access to user area and user
147 * page tables - they are in the map.
148 *
149 * XXX - VM_MAXUSER_ADDRESS is an end address, not a max. It was once
150 * only used (as an end address) in trap.c. Use it as an end address
151 * here too. This bogusness has spread. I just fixed where it was
152 * used as a max in vm_mmap.c.
153 */
154 if ((vm_offset_t) addr + len > /* XXX */ VM_MAXUSER_ADDRESS
155 || (vm_offset_t) addr + len < (vm_offset_t) addr) {
156 return (FALSE);
157 }
158 map = &curproc->p_vmspace->vm_map;
159 vm_map_lock_read(map);
160 /*
161 * We save the map hint, and restore it. Useracc appears to distort
162 * the map hint unnecessarily.
163 */
164 save_hint = map->hint;
165 rv = vm_map_check_protection(map,
166 trunc_page(addr), round_page(addr + len), prot);
167 map->hint = save_hint;
168 vm_map_unlock_read(map);
169
170 return (rv == TRUE);
171 }
172
173 void
174 vslock(addr, len)
175 caddr_t addr;
176 u_int len;
177 {
178 vm_map_pageable(&curproc->p_vmspace->vm_map, trunc_page(addr),
179 round_page(addr + len), FALSE);
180 }
181
182 void
183 vsunlock(addr, len, dirtied)
184 caddr_t addr;
185 u_int len;
186 int dirtied;
187 {
188 #ifdef lint
189 dirtied++;
190 #endif /* lint */
191 vm_map_pageable(&curproc->p_vmspace->vm_map, trunc_page(addr),
192 round_page(addr + len), TRUE);
193 }
194
195 /*
196 * Implement fork's actions on an address space.
197 * Here we arrange for the address space to be copied or referenced,
198 * allocate a user struct (pcb and kernel stack), then call the
199 * machine-dependent layer to fill those in and make the new process
200 * ready to run.
201 * NOTE: the kernel stack may be at a different location in the child
202 * process, and thus addresses of automatic variables may be invalid
203 * after cpu_fork returns in the child process. We do nothing here
204 * after cpu_fork returns.
205 */
206 int
207 vm_fork(p1, p2)
208 register struct proc *p1, *p2;
209 {
210 register struct user *up;
211 int i;
212 pmap_t pvp;
213 vm_object_t upobj;
214
215 while ((cnt.v_free_count + cnt.v_cache_count) < cnt.v_free_min) {
216 VM_WAIT;
217 }
218
219 p2->p_vmspace = vmspace_fork(p1->p_vmspace);
220
221 if (p1->p_vmspace->vm_shm)
222 shmfork(p1, p2);
223
224 pmap_new_proc(p2);
225
226 up = p2->p_addr;
227
228 /*
229 * p_stats and p_sigacts currently point at fields in the user struct
230 * but not at &u, instead at p_addr. Copy p_sigacts and parts of
231 * p_stats; zero the rest of p_stats (statistics).
232 */
233 p2->p_stats = &up->u_stats;
234 p2->p_sigacts = &up->u_sigacts;
235 up->u_sigacts = *p1->p_sigacts;
236 bzero(&up->u_stats.pstat_startzero,
237 (unsigned) ((caddr_t) &up->u_stats.pstat_endzero -
238 (caddr_t) &up->u_stats.pstat_startzero));
239 bcopy(&p1->p_stats->pstat_startcopy, &up->u_stats.pstat_startcopy,
240 ((caddr_t) &up->u_stats.pstat_endcopy -
241 (caddr_t) &up->u_stats.pstat_startcopy));
242
243
244 /*
245 * cpu_fork will copy and update the kernel stack and pcb, and make
246 * the child ready to run. It marks the child so that it can return
247 * differently than the parent. It returns twice, once in the parent
248 * process and once in the child.
249 */
250 return (cpu_fork(p1, p2));
251 }
252
253 /*
254 * Set default limits for VM system.
255 * Called for proc 0, and then inherited by all others.
256 *
257 * XXX should probably act directly on proc0.
258 */
259 static void
260 vm_init_limits(udata)
261 void *udata;
262 {
263 register struct proc *p = udata;
264 int rss_limit;
265
266 /*
267 * Set up the initial limits on process VM. Set the maximum resident
268 * set size to be half of (reasonably) available memory. Since this
269 * is a soft limit, it comes into effect only when the system is out
270 * of memory - half of main memory helps to favor smaller processes,
271 * and reduces thrashing of the object cache.
272 */
273 p->p_rlimit[RLIMIT_STACK].rlim_cur = DFLSSIZ;
274 p->p_rlimit[RLIMIT_STACK].rlim_max = MAXSSIZ;
275 p->p_rlimit[RLIMIT_DATA].rlim_cur = DFLDSIZ;
276 p->p_rlimit[RLIMIT_DATA].rlim_max = MAXDSIZ;
277 /* limit the limit to no less than 2MB */
278 rss_limit = max(cnt.v_free_count, 512);
279 p->p_rlimit[RLIMIT_RSS].rlim_cur = ptoa(rss_limit);
280 p->p_rlimit[RLIMIT_RSS].rlim_max = RLIM_INFINITY;
281 }
282
283 void
284 faultin(p)
285 struct proc *p;
286 {
287 vm_offset_t i;
288 int s;
289
290 if ((p->p_flag & P_INMEM) == 0) {
291
292 ++p->p_lock;
293
294 pmap_swapin_proc(p);
295
296 s = splhigh();
297
298 if (p->p_stat == SRUN)
299 setrunqueue(p);
300
301 p->p_flag |= P_INMEM;
302
303 /* undo the effect of setting SLOCK above */
304 --p->p_lock;
305 splx(s);
306
307 }
308 }
309
310 /*
311 * This swapin algorithm attempts to swap-in processes only if there
312 * is enough space for them. Of course, if a process waits for a long
313 * time, it will be swapped in anyway.
314 */
315 /* ARGSUSED*/
316 static void
317 scheduler(dummy)
318 void *dummy;
319 {
320 register struct proc *p;
321 register int pri;
322 struct proc *pp;
323 int ppri;
324
325 loop:
326 while ((cnt.v_free_count + cnt.v_cache_count) < cnt.v_free_min) {
327 VM_WAIT;
328 }
329
330 pp = NULL;
331 ppri = INT_MIN;
332 for (p = allproc.lh_first; p != 0; p = p->p_list.le_next) {
333 if (p->p_stat == SRUN &&
334 (p->p_flag & (P_INMEM | P_SWAPPING)) == 0) {
335 int mempri;
336
337 pri = p->p_swtime + p->p_slptime;
338 if ((p->p_flag & P_SWAPINREQ) == 0) {
339 pri -= p->p_nice * 8;
340 }
341 mempri = pri > 0 ? pri : 0;
342 /*
343 * if this process is higher priority and there is
344 * enough space, then select this process instead of
345 * the previous selection.
346 */
347 if (pri > ppri) {
348 pp = p;
349 ppri = pri;
350 }
351 }
352 }
353
354 /*
355 * Nothing to do, back to sleep.
356 */
357 if ((p = pp) == NULL) {
358 tsleep(&proc0, PVM, "sched", 0);
359 goto loop;
360 }
361 p->p_flag &= ~P_SWAPINREQ;
362
363 /*
364 * We would like to bring someone in. (only if there is space).
365 */
366 faultin(p);
367 p->p_swtime = 0;
368 goto loop;
369 }
370
371 #ifndef NO_SWAPPING
372
373 #define swappable(p) \
374 (((p)->p_lock == 0) && \
375 ((p)->p_flag & (P_TRACED|P_NOSWAP|P_SYSTEM|P_INMEM|P_WEXIT|P_PHYSIO|P_SWAPPING)) == P_INMEM)
376
377 /*
378 * Swapout is driven by the pageout daemon. Very simple, we find eligible
379 * procs and unwire their u-areas. We try to always "swap" at least one
380 * process in case we need the room for a swapin.
381 * If any procs have been sleeping/stopped for at least maxslp seconds,
382 * they are swapped. Else, we swap the longest-sleeping or stopped process,
383 * if any, otherwise the longest-resident process.
384 */
385 void
386 swapout_procs()
387 {
388 register struct proc *p;
389 struct proc *outp, *outp2;
390 int outpri, outpri2;
391 int didswap = 0;
392
393 outp = outp2 = NULL;
394 outpri = outpri2 = INT_MIN;
395 retry:
396 for (p = allproc.lh_first; p != 0; p = p->p_list.le_next) {
397 struct vmspace *vm;
398 if (!swappable(p))
399 continue;
400
401 vm = p->p_vmspace;
402
403 switch (p->p_stat) {
404 default:
405 continue;
406
407 case SSLEEP:
408 case SSTOP:
409 /*
410 * do not swapout a realtime process
411 */
412 if (p->p_rtprio.type == RTP_PRIO_REALTIME)
413 continue;
414
415 /*
416 * do not swapout a process waiting on a critical
417 * event of some kind
418 */
419 if (((p->p_priority & 0x7f) < PSOCK) ||
420 (p->p_slptime <= 10))
421 continue;
422
423 ++vm->vm_refcnt;
424 vm_map_reference(&vm->vm_map);
425 /*
426 * do not swapout a process that is waiting for VM
427 * datastructures there is a possible deadlock.
428 */
429 if (!lock_try_write(&vm->vm_map.lock)) {
430 vm_map_deallocate(&vm->vm_map);
431 vmspace_free(vm);
432 continue;
433 }
434 vm_map_unlock(&vm->vm_map);
435 /*
436 * If the process has been asleep for awhile and had
437 * most of its pages taken away already, swap it out.
438 */
439 swapout(p);
440 vm_map_deallocate(&vm->vm_map);
441 vmspace_free(vm);
442 didswap++;
443 goto retry;
444 }
445 }
446 /*
447 * If we swapped something out, and another process needed memory,
448 * then wakeup the sched process.
449 */
450 if (didswap)
451 wakeup(&proc0);
452 }
453
454 static void
455 swapout(p)
456 register struct proc *p;
457 {
458 pmap_t pmap = &p->p_vmspace->vm_pmap;
459 int i;
460
461 #if defined(SWAP_DEBUG)
462 printf("swapping out %d\n", p->p_pid);
463 #endif
464 ++p->p_stats->p_ru.ru_nswap;
465 /*
466 * remember the process resident count
467 */
468 p->p_vmspace->vm_swrss =
469 p->p_vmspace->vm_pmap.pm_stats.resident_count;
470
471 (void) splhigh();
472 p->p_flag &= ~P_INMEM;
473 p->p_flag |= P_SWAPPING;
474 if (p->p_stat == SRUN)
475 remrq(p);
476 (void) spl0();
477
478 pmap_swapout_proc(p);
479
480 p->p_flag &= ~P_SWAPPING;
481 p->p_swtime = 0;
482 }
483 #endif /* !NO_SWAPPING */
Cache object: 71373195338bf1f13cd34c0f51538297
|