FreeBSD/Linux Kernel Cross Reference
sys/uvm/uvm_map.c
1 /* $OpenBSD: uvm_map.c,v 1.309 2023/01/31 15:18:55 deraadt Exp $ */
2 /* $NetBSD: uvm_map.c,v 1.86 2000/11/27 08:40:03 chs Exp $ */
3
4 /*
5 * Copyright (c) 2011 Ariane van der Steldt <ariane@openbsd.org>
6 *
7 * Permission to use, copy, modify, and distribute this software for any
8 * purpose with or without fee is hereby granted, provided that the above
9 * copyright notice and this permission notice appear in all copies.
10 *
11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18 *
19 *
20 * Copyright (c) 1997 Charles D. Cranor and Washington University.
21 * Copyright (c) 1991, 1993, The Regents of the University of California.
22 *
23 * All rights reserved.
24 *
25 * This code is derived from software contributed to Berkeley by
26 * The Mach Operating System project at Carnegie-Mellon University.
27 *
28 * Redistribution and use in source and binary forms, with or without
29 * modification, are permitted provided that the following conditions
30 * are met:
31 * 1. Redistributions of source code must retain the above copyright
32 * notice, this list of conditions and the following disclaimer.
33 * 2. Redistributions in binary form must reproduce the above copyright
34 * notice, this list of conditions and the following disclaimer in the
35 * documentation and/or other materials provided with the distribution.
36 * 3. Neither the name of the University nor the names of its contributors
37 * may be used to endorse or promote products derived from this software
38 * without specific prior written permission.
39 *
40 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
41 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
42 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
43 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
44 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
45 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
46 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
47 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
48 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
49 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
50 * SUCH DAMAGE.
51 *
52 * @(#)vm_map.c 8.3 (Berkeley) 1/12/94
53 * from: Id: uvm_map.c,v 1.1.2.27 1998/02/07 01:16:54 chs Exp
54 *
55 *
56 * Copyright (c) 1987, 1990 Carnegie-Mellon University.
57 * All rights reserved.
58 *
59 * Permission to use, copy, modify and distribute this software and
60 * its documentation is hereby granted, provided that both the copyright
61 * notice and this permission notice appear in all copies of the
62 * software, derivative works or modified versions, and any portions
63 * thereof, and that both notices appear in supporting documentation.
64 *
65 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
66 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
67 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
68 *
69 * Carnegie Mellon requests users of this software to return to
70 *
71 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
72 * School of Computer Science
73 * Carnegie Mellon University
74 * Pittsburgh PA 15213-3890
75 *
76 * any improvements or extensions that they make and grant Carnegie the
77 * rights to redistribute these changes.
78 */
79
80 /*
81 * uvm_map.c: uvm map operations
82 */
83
84 /* #define DEBUG */
85 /* #define VMMAP_DEBUG */
86
87 #include <sys/param.h>
88 #include <sys/systm.h>
89 #include <sys/acct.h>
90 #include <sys/mman.h>
91 #include <sys/proc.h>
92 #include <sys/malloc.h>
93 #include <sys/pool.h>
94 #include <sys/sysctl.h>
95 #include <sys/signalvar.h>
96 #include <sys/syslog.h>
97 #include <sys/user.h>
98 #include <sys/tracepoint.h>
99
100 #ifdef SYSVSHM
101 #include <sys/shm.h>
102 #endif
103
104 #include <uvm/uvm.h>
105
106 #ifdef DDB
107 #include <uvm/uvm_ddb.h>
108 #endif
109
110 #include <uvm/uvm_addr.h>
111
112
113 vsize_t uvmspace_dused(struct vm_map*, vaddr_t, vaddr_t);
114 int uvm_mapent_isjoinable(struct vm_map*,
115 struct vm_map_entry*, struct vm_map_entry*);
116 struct vm_map_entry *uvm_mapent_merge(struct vm_map*, struct vm_map_entry*,
117 struct vm_map_entry*, struct uvm_map_deadq*);
118 struct vm_map_entry *uvm_mapent_tryjoin(struct vm_map*,
119 struct vm_map_entry*, struct uvm_map_deadq*);
120 struct vm_map_entry *uvm_map_mkentry(struct vm_map*, struct vm_map_entry*,
121 struct vm_map_entry*, vaddr_t, vsize_t, int,
122 struct uvm_map_deadq*, struct vm_map_entry*);
123 struct vm_map_entry *uvm_mapent_alloc(struct vm_map*, int);
124 void uvm_mapent_free(struct vm_map_entry*);
125 void uvm_unmap_kill_entry(struct vm_map*,
126 struct vm_map_entry*);
127 void uvm_unmap_kill_entry_withlock(struct vm_map *,
128 struct vm_map_entry *, int);
129 void uvm_unmap_detach_intrsafe(struct uvm_map_deadq *);
130 void uvm_mapent_mkfree(struct vm_map*,
131 struct vm_map_entry*, struct vm_map_entry**,
132 struct uvm_map_deadq*, boolean_t);
133 void uvm_map_pageable_pgon(struct vm_map*,
134 struct vm_map_entry*, struct vm_map_entry*,
135 vaddr_t, vaddr_t);
136 int uvm_map_pageable_wire(struct vm_map*,
137 struct vm_map_entry*, struct vm_map_entry*,
138 vaddr_t, vaddr_t, int);
139 void uvm_map_setup_entries(struct vm_map*);
140 void uvm_map_setup_md(struct vm_map*);
141 void uvm_map_teardown(struct vm_map*);
142 void uvm_map_vmspace_update(struct vm_map*,
143 struct uvm_map_deadq*, int);
144 void uvm_map_kmem_grow(struct vm_map*,
145 struct uvm_map_deadq*, vsize_t, int);
146 void uvm_map_freelist_update_clear(struct vm_map*,
147 struct uvm_map_deadq*);
148 void uvm_map_freelist_update_refill(struct vm_map *, int);
149 void uvm_map_freelist_update(struct vm_map*,
150 struct uvm_map_deadq*, vaddr_t, vaddr_t,
151 vaddr_t, vaddr_t, int);
152 struct vm_map_entry *uvm_map_fix_space(struct vm_map*, struct vm_map_entry*,
153 vaddr_t, vaddr_t, int);
154 int uvm_map_findspace(struct vm_map*,
155 struct vm_map_entry**, struct vm_map_entry**,
156 vaddr_t*, vsize_t, vaddr_t, vaddr_t, vm_prot_t,
157 vaddr_t);
158 vsize_t uvm_map_addr_augment_get(struct vm_map_entry*);
159 void uvm_map_addr_augment(struct vm_map_entry*);
160
161 int uvm_map_inentry_recheck(u_long, vaddr_t,
162 struct p_inentry *);
163 boolean_t uvm_map_inentry_fix(struct proc *, struct p_inentry *,
164 vaddr_t, int (*)(vm_map_entry_t), u_long);
165 /*
166 * Tree management functions.
167 */
168
169 static inline void uvm_mapent_copy(struct vm_map_entry*,
170 struct vm_map_entry*);
171 static inline int uvm_mapentry_addrcmp(const struct vm_map_entry*,
172 const struct vm_map_entry*);
173 void uvm_mapent_free_insert(struct vm_map*,
174 struct uvm_addr_state*, struct vm_map_entry*);
175 void uvm_mapent_free_remove(struct vm_map*,
176 struct uvm_addr_state*, struct vm_map_entry*);
177 void uvm_mapent_addr_insert(struct vm_map*,
178 struct vm_map_entry*);
179 void uvm_mapent_addr_remove(struct vm_map*,
180 struct vm_map_entry*);
181 void uvm_map_splitentry(struct vm_map*,
182 struct vm_map_entry*, struct vm_map_entry*,
183 vaddr_t);
184 vsize_t uvm_map_boundary(struct vm_map*, vaddr_t, vaddr_t);
185
186 /*
187 * uvm_vmspace_fork helper functions.
188 */
189 struct vm_map_entry *uvm_mapent_clone(struct vm_map*, vaddr_t, vsize_t,
190 vsize_t, vm_prot_t, vm_prot_t,
191 struct vm_map_entry*, struct uvm_map_deadq*, int,
192 int);
193 struct vm_map_entry *uvm_mapent_share(struct vm_map*, vaddr_t, vsize_t,
194 vsize_t, vm_prot_t, vm_prot_t, struct vm_map*,
195 struct vm_map_entry*, struct uvm_map_deadq*);
196 struct vm_map_entry *uvm_mapent_forkshared(struct vmspace*, struct vm_map*,
197 struct vm_map*, struct vm_map_entry*,
198 struct uvm_map_deadq*);
199 struct vm_map_entry *uvm_mapent_forkcopy(struct vmspace*, struct vm_map*,
200 struct vm_map*, struct vm_map_entry*,
201 struct uvm_map_deadq*);
202 struct vm_map_entry *uvm_mapent_forkzero(struct vmspace*, struct vm_map*,
203 struct vm_map*, struct vm_map_entry*,
204 struct uvm_map_deadq*);
205
206 /*
207 * Tree validation.
208 */
209 #ifdef VMMAP_DEBUG
210 void uvm_tree_assert(struct vm_map*, int, char*,
211 char*, int);
212 #define UVM_ASSERT(map, cond, file, line) \
213 uvm_tree_assert((map), (cond), #cond, (file), (line))
214 void uvm_tree_sanity(struct vm_map*, char*, int);
215 void uvm_tree_size_chk(struct vm_map*, char*, int);
216 void vmspace_validate(struct vm_map*);
217 #else
218 #define uvm_tree_sanity(_map, _file, _line) do {} while (0)
219 #define uvm_tree_size_chk(_map, _file, _line) do {} while (0)
220 #define vmspace_validate(_map) do {} while (0)
221 #endif
222
223 /*
224 * The kernel map will initially be VM_MAP_KSIZE_INIT bytes.
225 * Every time that gets cramped, we grow by at least VM_MAP_KSIZE_DELTA bytes.
226 *
227 * We attempt to grow by UVM_MAP_KSIZE_ALLOCMUL times the allocation size
228 * each time.
229 */
230 #define VM_MAP_KSIZE_INIT (512 * (vaddr_t)PAGE_SIZE)
231 #define VM_MAP_KSIZE_DELTA (256 * (vaddr_t)PAGE_SIZE)
232 #define VM_MAP_KSIZE_ALLOCMUL 4
233
234 /* auto-allocate address lower bound */
235 #define VMMAP_MIN_ADDR PAGE_SIZE
236
237
238 #ifdef DEADBEEF0
239 #define UVMMAP_DEADBEEF ((unsigned long)DEADBEEF0)
240 #else
241 #define UVMMAP_DEADBEEF ((unsigned long)0xdeadd0d0)
242 #endif
243
244 #ifdef DEBUG
245 int uvm_map_printlocks = 0;
246
247 #define LPRINTF(_args) \
248 do { \
249 if (uvm_map_printlocks) \
250 printf _args; \
251 } while (0)
252 #else
253 #define LPRINTF(_args) do {} while (0)
254 #endif
255
256 static struct mutex uvm_kmapent_mtx;
257 static struct timeval uvm_kmapent_last_warn_time;
258 static struct timeval uvm_kmapent_warn_rate = { 10, 0 };
259
260 const char vmmapbsy[] = "vmmapbsy";
261
262 /*
263 * pool for vmspace structures.
264 */
265 struct pool uvm_vmspace_pool;
266
267 /*
268 * pool for dynamically-allocated map entries.
269 */
270 struct pool uvm_map_entry_pool;
271 struct pool uvm_map_entry_kmem_pool;
272
273 /*
274 * This global represents the end of the kernel virtual address
275 * space. If we want to exceed this, we must grow the kernel
276 * virtual address space dynamically.
277 *
278 * Note, this variable is locked by kernel_map's lock.
279 */
280 vaddr_t uvm_maxkaddr;
281
282 /*
283 * Locking predicate.
284 */
285 #define UVM_MAP_REQ_WRITE(_map) \
286 do { \
287 if ((_map)->ref_count > 0) { \
288 if (((_map)->flags & VM_MAP_INTRSAFE) == 0) \
289 rw_assert_wrlock(&(_map)->lock); \
290 else \
291 MUTEX_ASSERT_LOCKED(&(_map)->mtx); \
292 } \
293 } while (0)
294
295 #define vm_map_modflags(map, set, clear) \
296 do { \
297 mtx_enter(&(map)->flags_lock); \
298 (map)->flags = ((map)->flags | (set)) & ~(clear); \
299 mtx_leave(&(map)->flags_lock); \
300 } while (0)
301
302
303 /*
304 * Tree describing entries by address.
305 *
306 * Addresses are unique.
307 * Entries with start == end may only exist if they are the first entry
308 * (sorted by address) within a free-memory tree.
309 */
310
311 static inline int
312 uvm_mapentry_addrcmp(const struct vm_map_entry *e1,
313 const struct vm_map_entry *e2)
314 {
315 return e1->start < e2->start ? -1 : e1->start > e2->start;
316 }
317
318 /*
319 * Copy mapentry.
320 */
321 static inline void
322 uvm_mapent_copy(struct vm_map_entry *src, struct vm_map_entry *dst)
323 {
324 caddr_t csrc, cdst;
325 size_t sz;
326
327 csrc = (caddr_t)src;
328 cdst = (caddr_t)dst;
329 csrc += offsetof(struct vm_map_entry, uvm_map_entry_start_copy);
330 cdst += offsetof(struct vm_map_entry, uvm_map_entry_start_copy);
331
332 sz = offsetof(struct vm_map_entry, uvm_map_entry_stop_copy) -
333 offsetof(struct vm_map_entry, uvm_map_entry_start_copy);
334 memcpy(cdst, csrc, sz);
335 }
336
337 /*
338 * Handle free-list insertion.
339 */
340 void
341 uvm_mapent_free_insert(struct vm_map *map, struct uvm_addr_state *uaddr,
342 struct vm_map_entry *entry)
343 {
344 const struct uvm_addr_functions *fun;
345 #ifdef VMMAP_DEBUG
346 vaddr_t min, max, bound;
347 #endif
348
349 #ifdef VMMAP_DEBUG
350 /*
351 * Boundary check.
352 * Boundaries are folded if they go on the same free list.
353 */
354 min = VMMAP_FREE_START(entry);
355 max = VMMAP_FREE_END(entry);
356
357 while (min < max) {
358 bound = uvm_map_boundary(map, min, max);
359 KASSERT(uvm_map_uaddr(map, min) == uaddr);
360 min = bound;
361 }
362 #endif
363 KDASSERT((entry->fspace & (vaddr_t)PAGE_MASK) == 0);
364 KASSERT((entry->etype & UVM_ET_FREEMAPPED) == 0);
365
366 UVM_MAP_REQ_WRITE(map);
367
368 /* Actual insert: forward to uaddr pointer. */
369 if (uaddr != NULL) {
370 fun = uaddr->uaddr_functions;
371 KDASSERT(fun != NULL);
372 if (fun->uaddr_free_insert != NULL)
373 (*fun->uaddr_free_insert)(map, uaddr, entry);
374 entry->etype |= UVM_ET_FREEMAPPED;
375 }
376
377 /* Update fspace augmentation. */
378 uvm_map_addr_augment(entry);
379 }
380
381 /*
382 * Handle free-list removal.
383 */
384 void
385 uvm_mapent_free_remove(struct vm_map *map, struct uvm_addr_state *uaddr,
386 struct vm_map_entry *entry)
387 {
388 const struct uvm_addr_functions *fun;
389
390 KASSERT((entry->etype & UVM_ET_FREEMAPPED) != 0 || uaddr == NULL);
391 KASSERT(uvm_map_uaddr_e(map, entry) == uaddr);
392 UVM_MAP_REQ_WRITE(map);
393
394 if (uaddr != NULL) {
395 fun = uaddr->uaddr_functions;
396 if (fun->uaddr_free_remove != NULL)
397 (*fun->uaddr_free_remove)(map, uaddr, entry);
398 entry->etype &= ~UVM_ET_FREEMAPPED;
399 }
400 }
401
402 /*
403 * Handle address tree insertion.
404 */
405 void
406 uvm_mapent_addr_insert(struct vm_map *map, struct vm_map_entry *entry)
407 {
408 struct vm_map_entry *res;
409
410 if (!RBT_CHECK(uvm_map_addr, entry, UVMMAP_DEADBEEF))
411 panic("uvm_mapent_addr_insert: entry still in addr list");
412 KDASSERT(entry->start <= entry->end);
413 KDASSERT((entry->start & (vaddr_t)PAGE_MASK) == 0 &&
414 (entry->end & (vaddr_t)PAGE_MASK) == 0);
415
416 TRACEPOINT(uvm, map_insert,
417 entry->start, entry->end, entry->protection, NULL);
418
419 UVM_MAP_REQ_WRITE(map);
420 res = RBT_INSERT(uvm_map_addr, &map->addr, entry);
421 if (res != NULL) {
422 panic("uvm_mapent_addr_insert: map %p entry %p "
423 "(0x%lx-0x%lx G=0x%lx F=0x%lx) insert collision "
424 "with entry %p (0x%lx-0x%lx G=0x%lx F=0x%lx)",
425 map, entry,
426 entry->start, entry->end, entry->guard, entry->fspace,
427 res, res->start, res->end, res->guard, res->fspace);
428 }
429 }
430
431 /*
432 * Handle address tree removal.
433 */
434 void
435 uvm_mapent_addr_remove(struct vm_map *map, struct vm_map_entry *entry)
436 {
437 struct vm_map_entry *res;
438
439 TRACEPOINT(uvm, map_remove,
440 entry->start, entry->end, entry->protection, NULL);
441
442 UVM_MAP_REQ_WRITE(map);
443 res = RBT_REMOVE(uvm_map_addr, &map->addr, entry);
444 if (res != entry)
445 panic("uvm_mapent_addr_remove");
446 RBT_POISON(uvm_map_addr, entry, UVMMAP_DEADBEEF);
447 }
448
449 /*
450 * uvm_map_reference: add reference to a map
451 *
452 * => map need not be locked
453 */
454 void
455 uvm_map_reference(struct vm_map *map)
456 {
457 atomic_inc_int(&map->ref_count);
458 }
459
460 void
461 uvm_map_lock_entry(struct vm_map_entry *entry)
462 {
463 if (entry->aref.ar_amap != NULL) {
464 amap_lock(entry->aref.ar_amap);
465 }
466 if (UVM_ET_ISOBJ(entry)) {
467 rw_enter(entry->object.uvm_obj->vmobjlock, RW_WRITE);
468 }
469 }
470
471 void
472 uvm_map_unlock_entry(struct vm_map_entry *entry)
473 {
474 if (UVM_ET_ISOBJ(entry)) {
475 rw_exit(entry->object.uvm_obj->vmobjlock);
476 }
477 if (entry->aref.ar_amap != NULL) {
478 amap_unlock(entry->aref.ar_amap);
479 }
480 }
481
482 /*
483 * Calculate the dused delta.
484 */
485 vsize_t
486 uvmspace_dused(struct vm_map *map, vaddr_t min, vaddr_t max)
487 {
488 struct vmspace *vm;
489 vsize_t sz;
490 vaddr_t lmax;
491 vaddr_t stack_begin, stack_end; /* Position of stack. */
492
493 KASSERT(map->flags & VM_MAP_ISVMSPACE);
494 vm_map_assert_anylock(map);
495
496 vm = (struct vmspace *)map;
497 stack_begin = MIN((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr);
498 stack_end = MAX((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr);
499
500 sz = 0;
501 while (min != max) {
502 lmax = max;
503 if (min < stack_begin && lmax > stack_begin)
504 lmax = stack_begin;
505 else if (min < stack_end && lmax > stack_end)
506 lmax = stack_end;
507
508 if (min >= stack_begin && min < stack_end) {
509 /* nothing */
510 } else
511 sz += lmax - min;
512 min = lmax;
513 }
514
515 return sz >> PAGE_SHIFT;
516 }
517
518 /*
519 * Find the entry describing the given address.
520 */
521 struct vm_map_entry*
522 uvm_map_entrybyaddr(struct uvm_map_addr *atree, vaddr_t addr)
523 {
524 struct vm_map_entry *iter;
525
526 iter = RBT_ROOT(uvm_map_addr, atree);
527 while (iter != NULL) {
528 if (iter->start > addr)
529 iter = RBT_LEFT(uvm_map_addr, iter);
530 else if (VMMAP_FREE_END(iter) <= addr)
531 iter = RBT_RIGHT(uvm_map_addr, iter);
532 else
533 return iter;
534 }
535 return NULL;
536 }
537
538 /*
539 * DEAD_ENTRY_PUSH(struct vm_map_deadq *deadq, struct vm_map_entry *entry)
540 *
541 * Push dead entries into a linked list.
542 * Since the linked list abuses the address tree for storage, the entry
543 * may not be linked in a map.
544 *
545 * *head must be initialized to NULL before the first call to this macro.
546 * uvm_unmap_detach(*head, 0) will remove dead entries.
547 */
548 static inline void
549 dead_entry_push(struct uvm_map_deadq *deadq, struct vm_map_entry *entry)
550 {
551 TAILQ_INSERT_TAIL(deadq, entry, dfree.deadq);
552 }
553 #define DEAD_ENTRY_PUSH(_headptr, _entry) \
554 dead_entry_push((_headptr), (_entry))
555
556 /*
557 * Test if memory starting at addr with sz bytes is free.
558 *
559 * Fills in *start_ptr and *end_ptr to be the first and last entry describing
560 * the space.
561 * If called with prefilled *start_ptr and *end_ptr, they are to be correct.
562 */
563 int
564 uvm_map_isavail(struct vm_map *map, struct uvm_addr_state *uaddr,
565 struct vm_map_entry **start_ptr, struct vm_map_entry **end_ptr,
566 vaddr_t addr, vsize_t sz)
567 {
568 struct uvm_addr_state *free;
569 struct uvm_map_addr *atree;
570 struct vm_map_entry *i, *i_end;
571
572 if (addr + sz < addr)
573 return 0;
574
575 vm_map_assert_anylock(map);
576
577 /*
578 * Kernel memory above uvm_maxkaddr is considered unavailable.
579 */
580 if ((map->flags & VM_MAP_ISVMSPACE) == 0) {
581 if (addr + sz > uvm_maxkaddr)
582 return 0;
583 }
584
585 atree = &map->addr;
586
587 /*
588 * Fill in first, last, so they point at the entries containing the
589 * first and last address of the range.
590 * Note that if they are not NULL, we don't perform the lookup.
591 */
592 KDASSERT(atree != NULL && start_ptr != NULL && end_ptr != NULL);
593 if (*start_ptr == NULL) {
594 *start_ptr = uvm_map_entrybyaddr(atree, addr);
595 if (*start_ptr == NULL)
596 return 0;
597 } else
598 KASSERT(*start_ptr == uvm_map_entrybyaddr(atree, addr));
599 if (*end_ptr == NULL) {
600 if (VMMAP_FREE_END(*start_ptr) >= addr + sz)
601 *end_ptr = *start_ptr;
602 else {
603 *end_ptr = uvm_map_entrybyaddr(atree, addr + sz - 1);
604 if (*end_ptr == NULL)
605 return 0;
606 }
607 } else
608 KASSERT(*end_ptr == uvm_map_entrybyaddr(atree, addr + sz - 1));
609
610 /* Validation. */
611 KDASSERT(*start_ptr != NULL && *end_ptr != NULL);
612 KDASSERT((*start_ptr)->start <= addr &&
613 VMMAP_FREE_END(*start_ptr) > addr &&
614 (*end_ptr)->start < addr + sz &&
615 VMMAP_FREE_END(*end_ptr) >= addr + sz);
616
617 /*
618 * Check the none of the entries intersects with <addr, addr+sz>.
619 * Also, if the entry belong to uaddr_exe or uaddr_brk_stack, it is
620 * considered unavailable unless called by those allocators.
621 */
622 i = *start_ptr;
623 i_end = RBT_NEXT(uvm_map_addr, *end_ptr);
624 for (; i != i_end;
625 i = RBT_NEXT(uvm_map_addr, i)) {
626 if (i->start != i->end && i->end > addr)
627 return 0;
628
629 /*
630 * uaddr_exe and uaddr_brk_stack may only be used
631 * by these allocators and the NULL uaddr (i.e. no
632 * uaddr).
633 * Reject if this requirement is not met.
634 */
635 if (uaddr != NULL) {
636 free = uvm_map_uaddr_e(map, i);
637
638 if (uaddr != free && free != NULL &&
639 (free == map->uaddr_exe ||
640 free == map->uaddr_brk_stack))
641 return 0;
642 }
643 }
644
645 return -1;
646 }
647
648 /*
649 * Invoke each address selector until an address is found.
650 * Will not invoke uaddr_exe.
651 */
652 int
653 uvm_map_findspace(struct vm_map *map, struct vm_map_entry**first,
654 struct vm_map_entry**last, vaddr_t *addr, vsize_t sz,
655 vaddr_t pmap_align, vaddr_t pmap_offset, vm_prot_t prot, vaddr_t hint)
656 {
657 struct uvm_addr_state *uaddr;
658 int i;
659
660 /*
661 * Allocation for sz bytes at any address,
662 * using the addr selectors in order.
663 */
664 for (i = 0; i < nitems(map->uaddr_any); i++) {
665 uaddr = map->uaddr_any[i];
666
667 if (uvm_addr_invoke(map, uaddr, first, last,
668 addr, sz, pmap_align, pmap_offset, prot, hint) == 0)
669 return 0;
670 }
671
672 /* Fall back to brk() and stack() address selectors. */
673 uaddr = map->uaddr_brk_stack;
674 if (uvm_addr_invoke(map, uaddr, first, last,
675 addr, sz, pmap_align, pmap_offset, prot, hint) == 0)
676 return 0;
677
678 return ENOMEM;
679 }
680
681 /* Calculate entry augmentation value. */
682 vsize_t
683 uvm_map_addr_augment_get(struct vm_map_entry *entry)
684 {
685 vsize_t augment;
686 struct vm_map_entry *left, *right;
687
688 augment = entry->fspace;
689 if ((left = RBT_LEFT(uvm_map_addr, entry)) != NULL)
690 augment = MAX(augment, left->fspace_augment);
691 if ((right = RBT_RIGHT(uvm_map_addr, entry)) != NULL)
692 augment = MAX(augment, right->fspace_augment);
693 return augment;
694 }
695
696 /*
697 * Update augmentation data in entry.
698 */
699 void
700 uvm_map_addr_augment(struct vm_map_entry *entry)
701 {
702 vsize_t augment;
703
704 while (entry != NULL) {
705 /* Calculate value for augmentation. */
706 augment = uvm_map_addr_augment_get(entry);
707
708 /*
709 * Descend update.
710 * Once we find an entry that already has the correct value,
711 * stop, since it means all its parents will use the correct
712 * value too.
713 */
714 if (entry->fspace_augment == augment)
715 return;
716 entry->fspace_augment = augment;
717 entry = RBT_PARENT(uvm_map_addr, entry);
718 }
719 }
720
721 /*
722 * uvm_mapanon: establish a valid mapping in map for an anon
723 *
724 * => *addr and sz must be a multiple of PAGE_SIZE.
725 * => *addr is ignored, except if flags contains UVM_FLAG_FIXED.
726 * => map must be unlocked.
727 *
728 * => align: align vaddr, must be a power-of-2.
729 * Align is only a hint and will be ignored if the alignment fails.
730 */
731 int
732 uvm_mapanon(struct vm_map *map, vaddr_t *addr, vsize_t sz,
733 vsize_t align, unsigned int flags)
734 {
735 struct vm_map_entry *first, *last, *entry, *new;
736 struct uvm_map_deadq dead;
737 vm_prot_t prot;
738 vm_prot_t maxprot;
739 vm_inherit_t inherit;
740 int advice;
741 int error;
742 vaddr_t pmap_align, pmap_offset;
743 vaddr_t hint;
744
745 KASSERT((map->flags & VM_MAP_ISVMSPACE) == VM_MAP_ISVMSPACE);
746 KASSERT(map != kernel_map);
747 KASSERT((map->flags & UVM_FLAG_HOLE) == 0);
748 KASSERT((map->flags & VM_MAP_INTRSAFE) == 0);
749 splassert(IPL_NONE);
750 KASSERT((flags & UVM_FLAG_TRYLOCK) == 0);
751
752 /*
753 * We use pmap_align and pmap_offset as alignment and offset variables.
754 *
755 * Because the align parameter takes precedence over pmap prefer,
756 * the pmap_align will need to be set to align, with pmap_offset = 0,
757 * if pmap_prefer will not align.
758 */
759 pmap_align = MAX(align, PAGE_SIZE);
760 pmap_offset = 0;
761
762 /* Decode parameters. */
763 prot = UVM_PROTECTION(flags);
764 maxprot = UVM_MAXPROTECTION(flags);
765 advice = UVM_ADVICE(flags);
766 inherit = UVM_INHERIT(flags);
767 error = 0;
768 hint = trunc_page(*addr);
769 TAILQ_INIT(&dead);
770 KASSERT((sz & (vaddr_t)PAGE_MASK) == 0);
771 KASSERT((align & (align - 1)) == 0);
772
773 /* Check protection. */
774 if ((prot & maxprot) != prot)
775 return EACCES;
776
777 /*
778 * Before grabbing the lock, allocate a map entry for later
779 * use to ensure we don't wait for memory while holding the
780 * vm_map_lock.
781 */
782 new = uvm_mapent_alloc(map, flags);
783 if (new == NULL)
784 return ENOMEM;
785
786 vm_map_lock(map);
787 first = last = NULL;
788 if (flags & UVM_FLAG_FIXED) {
789 /*
790 * Fixed location.
791 *
792 * Note: we ignore align, pmap_prefer.
793 * Fill in first, last and *addr.
794 */
795 KASSERT((*addr & PAGE_MASK) == 0);
796
797 /* Check that the space is available. */
798 if (flags & UVM_FLAG_UNMAP) {
799 if ((flags & UVM_FLAG_STACK) &&
800 !uvm_map_is_stack_remappable(map, *addr, sz,
801 (flags & UVM_FLAG_SIGALTSTACK))) {
802 error = EINVAL;
803 goto unlock;
804 }
805 if (uvm_unmap_remove(map, *addr, *addr + sz, &dead,
806 FALSE, TRUE,
807 (flags & UVM_FLAG_SIGALTSTACK) ? FALSE : TRUE) != 0) {
808 error = EPERM; /* immutable entries found */
809 goto unlock;
810 }
811 }
812 if (!uvm_map_isavail(map, NULL, &first, &last, *addr, sz)) {
813 error = ENOMEM;
814 goto unlock;
815 }
816 } else if (*addr != 0 && (*addr & PAGE_MASK) == 0 &&
817 (align == 0 || (*addr & (align - 1)) == 0) &&
818 uvm_map_isavail(map, NULL, &first, &last, *addr, sz)) {
819 /*
820 * Address used as hint.
821 *
822 * Note: we enforce the alignment restriction,
823 * but ignore pmap_prefer.
824 */
825 } else if ((prot & PROT_EXEC) != 0 && map->uaddr_exe != NULL) {
826 /* Run selection algorithm for executables. */
827 error = uvm_addr_invoke(map, map->uaddr_exe, &first, &last,
828 addr, sz, pmap_align, pmap_offset, prot, hint);
829
830 if (error != 0)
831 goto unlock;
832 } else {
833 /* Update freelists from vmspace. */
834 uvm_map_vmspace_update(map, &dead, flags);
835
836 error = uvm_map_findspace(map, &first, &last, addr, sz,
837 pmap_align, pmap_offset, prot, hint);
838
839 if (error != 0)
840 goto unlock;
841 }
842
843 /* Double-check if selected address doesn't cause overflow. */
844 if (*addr + sz < *addr) {
845 error = ENOMEM;
846 goto unlock;
847 }
848
849 /* If we only want a query, return now. */
850 if (flags & UVM_FLAG_QUERY) {
851 error = 0;
852 goto unlock;
853 }
854
855 /*
856 * Create new entry.
857 * first and last may be invalidated after this call.
858 */
859 entry = uvm_map_mkentry(map, first, last, *addr, sz, flags, &dead,
860 new);
861 if (entry == NULL) {
862 error = ENOMEM;
863 goto unlock;
864 }
865 new = NULL;
866 KDASSERT(entry->start == *addr && entry->end == *addr + sz);
867 entry->object.uvm_obj = NULL;
868 entry->offset = 0;
869 entry->protection = prot;
870 entry->max_protection = maxprot;
871 entry->inheritance = inherit;
872 entry->wired_count = 0;
873 entry->advice = advice;
874 if (prot & PROT_WRITE)
875 map->wserial++;
876 if (flags & UVM_FLAG_SYSCALL) {
877 entry->etype |= UVM_ET_SYSCALL;
878 map->wserial++;
879 }
880 if (flags & UVM_FLAG_STACK) {
881 entry->etype |= UVM_ET_STACK;
882 if (flags & (UVM_FLAG_FIXED | UVM_FLAG_UNMAP))
883 map->sserial++;
884 }
885 if (flags & UVM_FLAG_COPYONW) {
886 entry->etype |= UVM_ET_COPYONWRITE;
887 if ((flags & UVM_FLAG_OVERLAY) == 0)
888 entry->etype |= UVM_ET_NEEDSCOPY;
889 }
890 if (flags & UVM_FLAG_CONCEAL)
891 entry->etype |= UVM_ET_CONCEAL;
892 if (flags & UVM_FLAG_OVERLAY) {
893 entry->aref.ar_pageoff = 0;
894 entry->aref.ar_amap = amap_alloc(sz, M_WAITOK, 0);
895 }
896
897 /* Update map and process statistics. */
898 map->size += sz;
899 if (prot != PROT_NONE) {
900 ((struct vmspace *)map)->vm_dused +=
901 uvmspace_dused(map, *addr, *addr + sz);
902 }
903
904 unlock:
905 vm_map_unlock(map);
906
907 /*
908 * Remove dead entries.
909 *
910 * Dead entries may be the result of merging.
911 * uvm_map_mkentry may also create dead entries, when it attempts to
912 * destroy free-space entries.
913 */
914 uvm_unmap_detach(&dead, 0);
915
916 if (new)
917 uvm_mapent_free(new);
918 return error;
919 }
920
921 /*
922 * uvm_map: establish a valid mapping in map
923 *
924 * => *addr and sz must be a multiple of PAGE_SIZE.
925 * => map must be unlocked.
926 * => <uobj,uoffset> value meanings (4 cases):
927 * [1] <NULL,uoffset> == uoffset is a hint for PMAP_PREFER
928 * [2] <NULL,UVM_UNKNOWN_OFFSET> == don't PMAP_PREFER
929 * [3] <uobj,uoffset> == normal mapping
930 * [4] <uobj,UVM_UNKNOWN_OFFSET> == uvm_map finds offset based on VA
931 *
932 * case [4] is for kernel mappings where we don't know the offset until
933 * we've found a virtual address. note that kernel object offsets are
934 * always relative to vm_map_min(kernel_map).
935 *
936 * => align: align vaddr, must be a power-of-2.
937 * Align is only a hint and will be ignored if the alignment fails.
938 */
939 int
940 uvm_map(struct vm_map *map, vaddr_t *addr, vsize_t sz,
941 struct uvm_object *uobj, voff_t uoffset,
942 vsize_t align, unsigned int flags)
943 {
944 struct vm_map_entry *first, *last, *entry, *new;
945 struct uvm_map_deadq dead;
946 vm_prot_t prot;
947 vm_prot_t maxprot;
948 vm_inherit_t inherit;
949 int advice;
950 int error;
951 vaddr_t pmap_align, pmap_offset;
952 vaddr_t hint;
953
954 if ((map->flags & VM_MAP_INTRSAFE) == 0)
955 splassert(IPL_NONE);
956 else
957 splassert(IPL_VM);
958
959 /*
960 * We use pmap_align and pmap_offset as alignment and offset variables.
961 *
962 * Because the align parameter takes precedence over pmap prefer,
963 * the pmap_align will need to be set to align, with pmap_offset = 0,
964 * if pmap_prefer will not align.
965 */
966 if (uoffset == UVM_UNKNOWN_OFFSET) {
967 pmap_align = MAX(align, PAGE_SIZE);
968 pmap_offset = 0;
969 } else {
970 pmap_align = MAX(PMAP_PREFER_ALIGN(), PAGE_SIZE);
971 pmap_offset = PMAP_PREFER_OFFSET(uoffset);
972
973 if (align == 0 ||
974 (align <= pmap_align && (pmap_offset & (align - 1)) == 0)) {
975 /* pmap_offset satisfies align, no change. */
976 } else {
977 /* Align takes precedence over pmap prefer. */
978 pmap_align = align;
979 pmap_offset = 0;
980 }
981 }
982
983 /* Decode parameters. */
984 prot = UVM_PROTECTION(flags);
985 maxprot = UVM_MAXPROTECTION(flags);
986 advice = UVM_ADVICE(flags);
987 inherit = UVM_INHERIT(flags);
988 error = 0;
989 hint = trunc_page(*addr);
990 TAILQ_INIT(&dead);
991 KASSERT((sz & (vaddr_t)PAGE_MASK) == 0);
992 KASSERT((align & (align - 1)) == 0);
993
994 /* Holes are incompatible with other types of mappings. */
995 if (flags & UVM_FLAG_HOLE) {
996 KASSERT(uobj == NULL && (flags & UVM_FLAG_FIXED) &&
997 (flags & (UVM_FLAG_OVERLAY | UVM_FLAG_COPYONW)) == 0);
998 }
999
1000 /* Unset hint for kernel_map non-fixed allocations. */
1001 if (!(map->flags & VM_MAP_ISVMSPACE) && !(flags & UVM_FLAG_FIXED))
1002 hint = 0;
1003
1004 /* Check protection. */
1005 if ((prot & maxprot) != prot)
1006 return EACCES;
1007
1008 if (map == kernel_map &&
1009 (prot & (PROT_WRITE | PROT_EXEC)) == (PROT_WRITE | PROT_EXEC))
1010 panic("uvm_map: kernel map W^X violation requested");
1011
1012 /*
1013 * Before grabbing the lock, allocate a map entry for later
1014 * use to ensure we don't wait for memory while holding the
1015 * vm_map_lock.
1016 */
1017 new = uvm_mapent_alloc(map, flags);
1018 if (new == NULL)
1019 return ENOMEM;
1020
1021 if (flags & UVM_FLAG_TRYLOCK) {
1022 if (vm_map_lock_try(map) == FALSE) {
1023 error = EFAULT;
1024 goto out;
1025 }
1026 } else {
1027 vm_map_lock(map);
1028 }
1029
1030 first = last = NULL;
1031 if (flags & UVM_FLAG_FIXED) {
1032 /*
1033 * Fixed location.
1034 *
1035 * Note: we ignore align, pmap_prefer.
1036 * Fill in first, last and *addr.
1037 */
1038 KASSERT((*addr & PAGE_MASK) == 0);
1039
1040 /*
1041 * Grow pmap to include allocated address.
1042 * If the growth fails, the allocation will fail too.
1043 */
1044 if ((map->flags & VM_MAP_ISVMSPACE) == 0 &&
1045 uvm_maxkaddr < (*addr + sz)) {
1046 uvm_map_kmem_grow(map, &dead,
1047 *addr + sz - uvm_maxkaddr, flags);
1048 }
1049
1050 /* Check that the space is available. */
1051 if (flags & UVM_FLAG_UNMAP) {
1052 if (uvm_unmap_remove(map, *addr, *addr + sz, &dead,
1053 FALSE, TRUE, TRUE) != 0) {
1054 error = EPERM; /* immutable entries found */
1055 goto unlock;
1056 }
1057 }
1058 if (!uvm_map_isavail(map, NULL, &first, &last, *addr, sz)) {
1059 error = ENOMEM;
1060 goto unlock;
1061 }
1062 } else if (*addr != 0 && (*addr & PAGE_MASK) == 0 &&
1063 (map->flags & VM_MAP_ISVMSPACE) == VM_MAP_ISVMSPACE &&
1064 (align == 0 || (*addr & (align - 1)) == 0) &&
1065 uvm_map_isavail(map, NULL, &first, &last, *addr, sz)) {
1066 /*
1067 * Address used as hint.
1068 *
1069 * Note: we enforce the alignment restriction,
1070 * but ignore pmap_prefer.
1071 */
1072 } else if ((prot & PROT_EXEC) != 0 && map->uaddr_exe != NULL) {
1073 /* Run selection algorithm for executables. */
1074 error = uvm_addr_invoke(map, map->uaddr_exe, &first, &last,
1075 addr, sz, pmap_align, pmap_offset, prot, hint);
1076
1077 /* Grow kernel memory and try again. */
1078 if (error != 0 && (map->flags & VM_MAP_ISVMSPACE) == 0) {
1079 uvm_map_kmem_grow(map, &dead, sz, flags);
1080
1081 error = uvm_addr_invoke(map, map->uaddr_exe,
1082 &first, &last, addr, sz,
1083 pmap_align, pmap_offset, prot, hint);
1084 }
1085
1086 if (error != 0)
1087 goto unlock;
1088 } else {
1089 /* Update freelists from vmspace. */
1090 if (map->flags & VM_MAP_ISVMSPACE)
1091 uvm_map_vmspace_update(map, &dead, flags);
1092
1093 error = uvm_map_findspace(map, &first, &last, addr, sz,
1094 pmap_align, pmap_offset, prot, hint);
1095
1096 /* Grow kernel memory and try again. */
1097 if (error != 0 && (map->flags & VM_MAP_ISVMSPACE) == 0) {
1098 uvm_map_kmem_grow(map, &dead, sz, flags);
1099
1100 error = uvm_map_findspace(map, &first, &last, addr, sz,
1101 pmap_align, pmap_offset, prot, hint);
1102 }
1103
1104 if (error != 0)
1105 goto unlock;
1106 }
1107
1108 /* Double-check if selected address doesn't cause overflow. */
1109 if (*addr + sz < *addr) {
1110 error = ENOMEM;
1111 goto unlock;
1112 }
1113
1114 KASSERT((map->flags & VM_MAP_ISVMSPACE) == VM_MAP_ISVMSPACE ||
1115 uvm_maxkaddr >= *addr + sz);
1116
1117 /* If we only want a query, return now. */
1118 if (flags & UVM_FLAG_QUERY) {
1119 error = 0;
1120 goto unlock;
1121 }
1122
1123 if (uobj == NULL)
1124 uoffset = 0;
1125 else if (uoffset == UVM_UNKNOWN_OFFSET) {
1126 KASSERT(UVM_OBJ_IS_KERN_OBJECT(uobj));
1127 uoffset = *addr - vm_map_min(kernel_map);
1128 }
1129
1130 /*
1131 * Create new entry.
1132 * first and last may be invalidated after this call.
1133 */
1134 entry = uvm_map_mkentry(map, first, last, *addr, sz, flags, &dead,
1135 new);
1136 if (entry == NULL) {
1137 error = ENOMEM;
1138 goto unlock;
1139 }
1140 new = NULL;
1141 KDASSERT(entry->start == *addr && entry->end == *addr + sz);
1142 entry->object.uvm_obj = uobj;
1143 entry->offset = uoffset;
1144 entry->protection = prot;
1145 entry->max_protection = maxprot;
1146 entry->inheritance = inherit;
1147 entry->wired_count = 0;
1148 entry->advice = advice;
1149 if (prot & PROT_WRITE)
1150 map->wserial++;
1151 if (flags & UVM_FLAG_SYSCALL) {
1152 entry->etype |= UVM_ET_SYSCALL;
1153 map->wserial++;
1154 }
1155 if (flags & UVM_FLAG_STACK) {
1156 entry->etype |= UVM_ET_STACK;
1157 if (flags & UVM_FLAG_UNMAP)
1158 map->sserial++;
1159 }
1160 if (uobj)
1161 entry->etype |= UVM_ET_OBJ;
1162 else if (flags & UVM_FLAG_HOLE)
1163 entry->etype |= UVM_ET_HOLE;
1164 if (flags & UVM_FLAG_NOFAULT)
1165 entry->etype |= UVM_ET_NOFAULT;
1166 if (flags & UVM_FLAG_WC)
1167 entry->etype |= UVM_ET_WC;
1168 if (flags & UVM_FLAG_COPYONW) {
1169 entry->etype |= UVM_ET_COPYONWRITE;
1170 if ((flags & UVM_FLAG_OVERLAY) == 0)
1171 entry->etype |= UVM_ET_NEEDSCOPY;
1172 }
1173 if (flags & UVM_FLAG_CONCEAL)
1174 entry->etype |= UVM_ET_CONCEAL;
1175 if (flags & UVM_FLAG_OVERLAY) {
1176 entry->aref.ar_pageoff = 0;
1177 entry->aref.ar_amap = amap_alloc(sz, M_WAITOK, 0);
1178 }
1179
1180 /* Update map and process statistics. */
1181 if (!(flags & UVM_FLAG_HOLE)) {
1182 map->size += sz;
1183 if ((map->flags & VM_MAP_ISVMSPACE) && uobj == NULL &&
1184 prot != PROT_NONE) {
1185 ((struct vmspace *)map)->vm_dused +=
1186 uvmspace_dused(map, *addr, *addr + sz);
1187 }
1188 }
1189
1190 /*
1191 * Try to merge entry.
1192 *
1193 * Userland allocations are kept separated most of the time.
1194 * Forego the effort of merging what most of the time can't be merged
1195 * and only try the merge if it concerns a kernel entry.
1196 */
1197 if ((flags & UVM_FLAG_NOMERGE) == 0 &&
1198 (map->flags & VM_MAP_ISVMSPACE) == 0)
1199 uvm_mapent_tryjoin(map, entry, &dead);
1200
1201 unlock:
1202 vm_map_unlock(map);
1203
1204 /*
1205 * Remove dead entries.
1206 *
1207 * Dead entries may be the result of merging.
1208 * uvm_map_mkentry may also create dead entries, when it attempts to
1209 * destroy free-space entries.
1210 */
1211 if (map->flags & VM_MAP_INTRSAFE)
1212 uvm_unmap_detach_intrsafe(&dead);
1213 else
1214 uvm_unmap_detach(&dead, 0);
1215 out:
1216 if (new)
1217 uvm_mapent_free(new);
1218 return error;
1219 }
1220
1221 /*
1222 * True iff e1 and e2 can be joined together.
1223 */
1224 int
1225 uvm_mapent_isjoinable(struct vm_map *map, struct vm_map_entry *e1,
1226 struct vm_map_entry *e2)
1227 {
1228 KDASSERT(e1 != NULL && e2 != NULL);
1229
1230 /* Must be the same entry type and not have free memory between. */
1231 if (e1->etype != e2->etype || e1->end != e2->start)
1232 return 0;
1233
1234 /* Submaps are never joined. */
1235 if (UVM_ET_ISSUBMAP(e1))
1236 return 0;
1237
1238 /* Never merge wired memory. */
1239 if (VM_MAPENT_ISWIRED(e1) || VM_MAPENT_ISWIRED(e2))
1240 return 0;
1241
1242 /* Protection, inheritance and advice must be equal. */
1243 if (e1->protection != e2->protection ||
1244 e1->max_protection != e2->max_protection ||
1245 e1->inheritance != e2->inheritance ||
1246 e1->advice != e2->advice)
1247 return 0;
1248
1249 /* If uvm_object: object itself and offsets within object must match. */
1250 if (UVM_ET_ISOBJ(e1)) {
1251 if (e1->object.uvm_obj != e2->object.uvm_obj)
1252 return 0;
1253 if (e1->offset + (e1->end - e1->start) != e2->offset)
1254 return 0;
1255 }
1256
1257 /*
1258 * Cannot join shared amaps.
1259 * Note: no need to lock amap to look at refs, since we don't care
1260 * about its exact value.
1261 * If it is 1 (i.e. we have the only reference) it will stay there.
1262 */
1263 if (e1->aref.ar_amap && amap_refs(e1->aref.ar_amap) != 1)
1264 return 0;
1265 if (e2->aref.ar_amap && amap_refs(e2->aref.ar_amap) != 1)
1266 return 0;
1267
1268 /* Apparently, e1 and e2 match. */
1269 return 1;
1270 }
1271
1272 /*
1273 * Join support function.
1274 *
1275 * Returns the merged entry on success.
1276 * Returns NULL if the merge failed.
1277 */
1278 struct vm_map_entry*
1279 uvm_mapent_merge(struct vm_map *map, struct vm_map_entry *e1,
1280 struct vm_map_entry *e2, struct uvm_map_deadq *dead)
1281 {
1282 struct uvm_addr_state *free;
1283
1284 /*
1285 * Merging is not supported for map entries that
1286 * contain an amap in e1. This should never happen
1287 * anyway, because only kernel entries are merged.
1288 * These do not contain amaps.
1289 * e2 contains no real information in its amap,
1290 * so it can be erased immediately.
1291 */
1292 KASSERT(e1->aref.ar_amap == NULL);
1293
1294 /*
1295 * Don't drop obj reference:
1296 * uvm_unmap_detach will do this for us.
1297 */
1298 free = uvm_map_uaddr_e(map, e1);
1299 uvm_mapent_free_remove(map, free, e1);
1300
1301 free = uvm_map_uaddr_e(map, e2);
1302 uvm_mapent_free_remove(map, free, e2);
1303 uvm_mapent_addr_remove(map, e2);
1304 e1->end = e2->end;
1305 e1->guard = e2->guard;
1306 e1->fspace = e2->fspace;
1307 uvm_mapent_free_insert(map, free, e1);
1308
1309 DEAD_ENTRY_PUSH(dead, e2);
1310 return e1;
1311 }
1312
1313 /*
1314 * Attempt forward and backward joining of entry.
1315 *
1316 * Returns entry after joins.
1317 * We are guaranteed that the amap of entry is either non-existent or
1318 * has never been used.
1319 */
1320 struct vm_map_entry*
1321 uvm_mapent_tryjoin(struct vm_map *map, struct vm_map_entry *entry,
1322 struct uvm_map_deadq *dead)
1323 {
1324 struct vm_map_entry *other;
1325 struct vm_map_entry *merged;
1326
1327 /* Merge with previous entry. */
1328 other = RBT_PREV(uvm_map_addr, entry);
1329 if (other && uvm_mapent_isjoinable(map, other, entry)) {
1330 merged = uvm_mapent_merge(map, other, entry, dead);
1331 if (merged)
1332 entry = merged;
1333 }
1334
1335 /*
1336 * Merge with next entry.
1337 *
1338 * Because amap can only extend forward and the next entry
1339 * probably contains sensible info, only perform forward merging
1340 * in the absence of an amap.
1341 */
1342 other = RBT_NEXT(uvm_map_addr, entry);
1343 if (other && entry->aref.ar_amap == NULL &&
1344 other->aref.ar_amap == NULL &&
1345 uvm_mapent_isjoinable(map, entry, other)) {
1346 merged = uvm_mapent_merge(map, entry, other, dead);
1347 if (merged)
1348 entry = merged;
1349 }
1350
1351 return entry;
1352 }
1353
1354 /*
1355 * Kill entries that are no longer in a map.
1356 */
1357 void
1358 uvm_unmap_detach(struct uvm_map_deadq *deadq, int flags)
1359 {
1360 struct vm_map_entry *entry, *tmp;
1361 int waitok = flags & UVM_PLA_WAITOK;
1362
1363 TAILQ_FOREACH_SAFE(entry, deadq, dfree.deadq, tmp) {
1364 /* Drop reference to amap, if we've got one. */
1365 if (entry->aref.ar_amap)
1366 amap_unref(entry->aref.ar_amap,
1367 entry->aref.ar_pageoff,
1368 atop(entry->end - entry->start),
1369 flags & AMAP_REFALL);
1370
1371 /* Skip entries for which we have to grab the kernel lock. */
1372 if (UVM_ET_ISSUBMAP(entry) || UVM_ET_ISOBJ(entry))
1373 continue;
1374
1375 TAILQ_REMOVE(deadq, entry, dfree.deadq);
1376 uvm_mapent_free(entry);
1377 }
1378
1379 if (TAILQ_EMPTY(deadq))
1380 return;
1381
1382 KERNEL_LOCK();
1383 while ((entry = TAILQ_FIRST(deadq)) != NULL) {
1384 if (waitok)
1385 uvm_pause();
1386 /* Drop reference to our backing object, if we've got one. */
1387 if (UVM_ET_ISSUBMAP(entry)) {
1388 /* ... unlikely to happen, but play it safe */
1389 uvm_map_deallocate(entry->object.sub_map);
1390 } else if (UVM_ET_ISOBJ(entry) &&
1391 entry->object.uvm_obj->pgops->pgo_detach) {
1392 entry->object.uvm_obj->pgops->pgo_detach(
1393 entry->object.uvm_obj);
1394 }
1395
1396 /* Step to next. */
1397 TAILQ_REMOVE(deadq, entry, dfree.deadq);
1398 uvm_mapent_free(entry);
1399 }
1400 KERNEL_UNLOCK();
1401 }
1402
1403 void
1404 uvm_unmap_detach_intrsafe(struct uvm_map_deadq *deadq)
1405 {
1406 struct vm_map_entry *entry;
1407
1408 while ((entry = TAILQ_FIRST(deadq)) != NULL) {
1409 KASSERT(entry->aref.ar_amap == NULL);
1410 KASSERT(!UVM_ET_ISSUBMAP(entry));
1411 KASSERT(!UVM_ET_ISOBJ(entry));
1412 TAILQ_REMOVE(deadq, entry, dfree.deadq);
1413 uvm_mapent_free(entry);
1414 }
1415 }
1416
1417 /*
1418 * Create and insert new entry.
1419 *
1420 * Returned entry contains new addresses and is inserted properly in the tree.
1421 * first and last are (probably) no longer valid.
1422 */
1423 struct vm_map_entry*
1424 uvm_map_mkentry(struct vm_map *map, struct vm_map_entry *first,
1425 struct vm_map_entry *last, vaddr_t addr, vsize_t sz, int flags,
1426 struct uvm_map_deadq *dead, struct vm_map_entry *new)
1427 {
1428 struct vm_map_entry *entry, *prev;
1429 struct uvm_addr_state *free;
1430 vaddr_t min, max; /* free space boundaries for new entry */
1431
1432 KDASSERT(map != NULL);
1433 KDASSERT(first != NULL);
1434 KDASSERT(last != NULL);
1435 KDASSERT(dead != NULL);
1436 KDASSERT(sz > 0);
1437 KDASSERT(addr + sz > addr);
1438 KDASSERT(first->end <= addr && VMMAP_FREE_END(first) > addr);
1439 KDASSERT(last->start < addr + sz && VMMAP_FREE_END(last) >= addr + sz);
1440 KDASSERT(uvm_map_isavail(map, NULL, &first, &last, addr, sz));
1441 uvm_tree_sanity(map, __FILE__, __LINE__);
1442
1443 min = addr + sz;
1444 max = VMMAP_FREE_END(last);
1445
1446 /* Initialize new entry. */
1447 if (new == NULL)
1448 entry = uvm_mapent_alloc(map, flags);
1449 else
1450 entry = new;
1451 if (entry == NULL)
1452 return NULL;
1453 entry->offset = 0;
1454 entry->etype = 0;
1455 entry->wired_count = 0;
1456 entry->aref.ar_pageoff = 0;
1457 entry->aref.ar_amap = NULL;
1458
1459 entry->start = addr;
1460 entry->end = min;
1461 entry->guard = 0;
1462 entry->fspace = 0;
1463
1464 vm_map_assert_wrlock(map);
1465
1466 /* Reset free space in first. */
1467 free = uvm_map_uaddr_e(map, first);
1468 uvm_mapent_free_remove(map, free, first);
1469 first->guard = 0;
1470 first->fspace = 0;
1471
1472 /*
1473 * Remove all entries that are fully replaced.
1474 * We are iterating using last in reverse order.
1475 */
1476 for (; first != last; last = prev) {
1477 prev = RBT_PREV(uvm_map_addr, last);
1478
1479 KDASSERT(last->start == last->end);
1480 free = uvm_map_uaddr_e(map, last);
1481 uvm_mapent_free_remove(map, free, last);
1482 uvm_mapent_addr_remove(map, last);
1483 DEAD_ENTRY_PUSH(dead, last);
1484 }
1485 /* Remove first if it is entirely inside <addr, addr+sz>. */
1486 if (first->start == addr) {
1487 uvm_mapent_addr_remove(map, first);
1488 DEAD_ENTRY_PUSH(dead, first);
1489 } else {
1490 uvm_map_fix_space(map, first, VMMAP_FREE_START(first),
1491 addr, flags);
1492 }
1493
1494 /* Finally, link in entry. */
1495 uvm_mapent_addr_insert(map, entry);
1496 uvm_map_fix_space(map, entry, min, max, flags);
1497
1498 uvm_tree_sanity(map, __FILE__, __LINE__);
1499 return entry;
1500 }
1501
1502
1503 /*
1504 * uvm_mapent_alloc: allocate a map entry
1505 */
1506 struct vm_map_entry *
1507 uvm_mapent_alloc(struct vm_map *map, int flags)
1508 {
1509 struct vm_map_entry *me, *ne;
1510 int pool_flags;
1511 int i;
1512
1513 pool_flags = PR_WAITOK;
1514 if (flags & UVM_FLAG_TRYLOCK)
1515 pool_flags = PR_NOWAIT;
1516
1517 if (map->flags & VM_MAP_INTRSAFE || cold) {
1518 mtx_enter(&uvm_kmapent_mtx);
1519 if (SLIST_EMPTY(&uvm.kentry_free)) {
1520 ne = km_alloc(PAGE_SIZE, &kv_page, &kp_dirty,
1521 &kd_nowait);
1522 if (ne == NULL)
1523 panic("uvm_mapent_alloc: cannot allocate map "
1524 "entry");
1525 for (i = 0; i < PAGE_SIZE / sizeof(*ne); i++) {
1526 SLIST_INSERT_HEAD(&uvm.kentry_free,
1527 &ne[i], daddrs.addr_kentry);
1528 }
1529 if (ratecheck(&uvm_kmapent_last_warn_time,
1530 &uvm_kmapent_warn_rate))
1531 printf("uvm_mapent_alloc: out of static "
1532 "map entries\n");
1533 }
1534 me = SLIST_FIRST(&uvm.kentry_free);
1535 SLIST_REMOVE_HEAD(&uvm.kentry_free, daddrs.addr_kentry);
1536 uvmexp.kmapent++;
1537 mtx_leave(&uvm_kmapent_mtx);
1538 me->flags = UVM_MAP_STATIC;
1539 } else if (map == kernel_map) {
1540 splassert(IPL_NONE);
1541 me = pool_get(&uvm_map_entry_kmem_pool, pool_flags);
1542 if (me == NULL)
1543 goto out;
1544 me->flags = UVM_MAP_KMEM;
1545 } else {
1546 splassert(IPL_NONE);
1547 me = pool_get(&uvm_map_entry_pool, pool_flags);
1548 if (me == NULL)
1549 goto out;
1550 me->flags = 0;
1551 }
1552
1553 RBT_POISON(uvm_map_addr, me, UVMMAP_DEADBEEF);
1554 out:
1555 return me;
1556 }
1557
1558 /*
1559 * uvm_mapent_free: free map entry
1560 *
1561 * => XXX: static pool for kernel map?
1562 */
1563 void
1564 uvm_mapent_free(struct vm_map_entry *me)
1565 {
1566 if (me->flags & UVM_MAP_STATIC) {
1567 mtx_enter(&uvm_kmapent_mtx);
1568 SLIST_INSERT_HEAD(&uvm.kentry_free, me, daddrs.addr_kentry);
1569 uvmexp.kmapent--;
1570 mtx_leave(&uvm_kmapent_mtx);
1571 } else if (me->flags & UVM_MAP_KMEM) {
1572 splassert(IPL_NONE);
1573 pool_put(&uvm_map_entry_kmem_pool, me);
1574 } else {
1575 splassert(IPL_NONE);
1576 pool_put(&uvm_map_entry_pool, me);
1577 }
1578 }
1579
1580 /*
1581 * uvm_map_lookup_entry: find map entry at or before an address.
1582 *
1583 * => map must at least be read-locked by caller
1584 * => entry is returned in "entry"
1585 * => return value is true if address is in the returned entry
1586 * ET_HOLE entries are considered to not contain a mapping, ergo FALSE is
1587 * returned for those mappings.
1588 */
1589 boolean_t
1590 uvm_map_lookup_entry(struct vm_map *map, vaddr_t address,
1591 struct vm_map_entry **entry)
1592 {
1593 vm_map_assert_anylock(map);
1594
1595 *entry = uvm_map_entrybyaddr(&map->addr, address);
1596 return *entry != NULL && !UVM_ET_ISHOLE(*entry) &&
1597 (*entry)->start <= address && (*entry)->end > address;
1598 }
1599
1600 /*
1601 * Stack must be in a MAP_STACK entry. PROT_NONE indicates stack not yet
1602 * grown -- then uvm_map_check_region_range() should not cache the entry
1603 * because growth won't be seen.
1604 */
1605 int
1606 uvm_map_inentry_sp(vm_map_entry_t entry)
1607 {
1608 if ((entry->etype & UVM_ET_STACK) == 0) {
1609 if (entry->protection == PROT_NONE)
1610 return (-1); /* don't update range */
1611 return (0);
1612 }
1613 return (1);
1614 }
1615
1616 /*
1617 * The system call must not come from a writeable entry, W^X is violated.
1618 * (Would be nice if we can spot aliasing, which is also kind of bad)
1619 *
1620 * The system call must come from an syscall-labeled entry (which are
1621 * the text regions of the main program, sigtramp, ld.so, or libc).
1622 */
1623 int
1624 uvm_map_inentry_pc(vm_map_entry_t entry)
1625 {
1626 if (entry->protection & PROT_WRITE)
1627 return (0); /* not permitted */
1628 if ((entry->etype & UVM_ET_SYSCALL) == 0)
1629 return (0); /* not permitted */
1630 return (1);
1631 }
1632
1633 int
1634 uvm_map_inentry_recheck(u_long serial, vaddr_t addr, struct p_inentry *ie)
1635 {
1636 return (serial != ie->ie_serial || ie->ie_start == 0 ||
1637 addr < ie->ie_start || addr >= ie->ie_end);
1638 }
1639
1640 /*
1641 * Inside a vm_map find the reg address and verify it via function.
1642 * Remember low and high addresses of region if valid and return TRUE,
1643 * else return FALSE.
1644 */
1645 boolean_t
1646 uvm_map_inentry_fix(struct proc *p, struct p_inentry *ie, vaddr_t addr,
1647 int (*fn)(vm_map_entry_t), u_long serial)
1648 {
1649 vm_map_t map = &p->p_vmspace->vm_map;
1650 vm_map_entry_t entry;
1651 int ret;
1652
1653 if (addr < map->min_offset || addr >= map->max_offset)
1654 return (FALSE);
1655
1656 /* lock map */
1657 vm_map_lock_read(map);
1658
1659 /* lookup */
1660 if (!uvm_map_lookup_entry(map, trunc_page(addr), &entry)) {
1661 vm_map_unlock_read(map);
1662 return (FALSE);
1663 }
1664
1665 ret = (*fn)(entry);
1666 if (ret == 0) {
1667 vm_map_unlock_read(map);
1668 return (FALSE);
1669 } else if (ret == 1) {
1670 ie->ie_start = entry->start;
1671 ie->ie_end = entry->end;
1672 ie->ie_serial = serial;
1673 } else {
1674 /* do not update, re-check later */
1675 }
1676 vm_map_unlock_read(map);
1677 return (TRUE);
1678 }
1679
1680 boolean_t
1681 uvm_map_inentry(struct proc *p, struct p_inentry *ie, vaddr_t addr,
1682 const char *fmt, int (*fn)(vm_map_entry_t), u_long serial)
1683 {
1684 union sigval sv;
1685 boolean_t ok = TRUE;
1686
1687 if (uvm_map_inentry_recheck(serial, addr, ie)) {
1688 ok = uvm_map_inentry_fix(p, ie, addr, fn, serial);
1689 if (!ok) {
1690 KERNEL_LOCK();
1691 printf(fmt, p->p_p->ps_comm, p->p_p->ps_pid, p->p_tid,
1692 addr, ie->ie_start, ie->ie_end-1);
1693 p->p_p->ps_acflag |= AMAP;
1694 sv.sival_ptr = (void *)PROC_PC(p);
1695 trapsignal(p, SIGSEGV, 0, SEGV_ACCERR, sv);
1696 KERNEL_UNLOCK();
1697 }
1698 }
1699 return (ok);
1700 }
1701
1702 /*
1703 * Check whether the given address range can be converted to a MAP_STACK
1704 * mapping.
1705 *
1706 * Must be called with map locked.
1707 */
1708 boolean_t
1709 uvm_map_is_stack_remappable(struct vm_map *map, vaddr_t addr, vaddr_t sz,
1710 int sigaltstack_check)
1711 {
1712 vaddr_t end = addr + sz;
1713 struct vm_map_entry *first, *iter, *prev = NULL;
1714
1715 vm_map_assert_anylock(map);
1716
1717 if (!uvm_map_lookup_entry(map, addr, &first)) {
1718 printf("map stack 0x%lx-0x%lx of map %p failed: no mapping\n",
1719 addr, end, map);
1720 return FALSE;
1721 }
1722
1723 /*
1724 * Check that the address range exists and is contiguous.
1725 */
1726 for (iter = first; iter != NULL && iter->start < end;
1727 prev = iter, iter = RBT_NEXT(uvm_map_addr, iter)) {
1728 /*
1729 * Make sure that we do not have holes in the range.
1730 */
1731 #if 0
1732 if (prev != NULL) {
1733 printf("prev->start 0x%lx, prev->end 0x%lx, "
1734 "iter->start 0x%lx, iter->end 0x%lx\n",
1735 prev->start, prev->end, iter->start, iter->end);
1736 }
1737 #endif
1738
1739 if (prev != NULL && prev->end != iter->start) {
1740 printf("map stack 0x%lx-0x%lx of map %p failed: "
1741 "hole in range\n", addr, end, map);
1742 return FALSE;
1743 }
1744 if (iter->start == iter->end || UVM_ET_ISHOLE(iter)) {
1745 printf("map stack 0x%lx-0x%lx of map %p failed: "
1746 "hole in range\n", addr, end, map);
1747 return FALSE;
1748 }
1749 if (sigaltstack_check) {
1750 if ((iter->etype & UVM_ET_SYSCALL))
1751 return FALSE;
1752 if (iter->protection != (PROT_READ | PROT_WRITE))
1753 return FALSE;
1754 }
1755 }
1756
1757 return TRUE;
1758 }
1759
1760 /*
1761 * Remap the middle-pages of an existing mapping as a stack range.
1762 * If there exists a previous contiguous mapping with the given range
1763 * [addr, addr + sz), with protection PROT_READ|PROT_WRITE, then the
1764 * mapping is dropped, and a new anon mapping is created and marked as
1765 * a stack.
1766 *
1767 * Must be called with map unlocked.
1768 */
1769 int
1770 uvm_map_remap_as_stack(struct proc *p, vaddr_t addr, vaddr_t sz)
1771 {
1772 vm_map_t map = &p->p_vmspace->vm_map;
1773 vaddr_t start, end;
1774 int error;
1775 int flags = UVM_MAPFLAG(PROT_READ | PROT_WRITE,
1776 PROT_READ | PROT_WRITE | PROT_EXEC,
1777 MAP_INHERIT_COPY, MADV_NORMAL,
1778 UVM_FLAG_STACK | UVM_FLAG_FIXED | UVM_FLAG_UNMAP |
1779 UVM_FLAG_COPYONW | UVM_FLAG_SIGALTSTACK);
1780
1781 start = round_page(addr);
1782 end = trunc_page(addr + sz);
1783 #ifdef MACHINE_STACK_GROWS_UP
1784 if (end == addr + sz)
1785 end -= PAGE_SIZE;
1786 #else
1787 if (start == addr)
1788 start += PAGE_SIZE;
1789 #endif
1790
1791 if (start < map->min_offset || end >= map->max_offset || end < start)
1792 return EINVAL;
1793
1794 /*
1795 * UVM_FLAG_SIGALTSTACK indicates that immutable may be bypassed,
1796 * but the range is checked that it is contigous, is not a syscall
1797 * mapping, and protection RW. Then, a new mapping (all zero) is
1798 * placed upon the region, which prevents an attacker from pivoting
1799 * into pre-placed MAP_STACK space.
1800 */
1801 error = uvm_mapanon(map, &start, end - start, 0, flags);
1802 if (error != 0)
1803 printf("map stack for pid %d failed\n", p->p_p->ps_pid);
1804
1805 return error;
1806 }
1807
1808 /*
1809 * uvm_map_pie: return a random load address for a PIE executable
1810 * properly aligned.
1811 */
1812 #ifndef VM_PIE_MAX_ADDR
1813 #define VM_PIE_MAX_ADDR (VM_MAXUSER_ADDRESS / 4)
1814 #endif
1815
1816 #ifndef VM_PIE_MIN_ADDR
1817 #define VM_PIE_MIN_ADDR VM_MIN_ADDRESS
1818 #endif
1819
1820 #ifndef VM_PIE_MIN_ALIGN
1821 #define VM_PIE_MIN_ALIGN PAGE_SIZE
1822 #endif
1823
1824 vaddr_t
1825 uvm_map_pie(vaddr_t align)
1826 {
1827 vaddr_t addr, space, min;
1828
1829 align = MAX(align, VM_PIE_MIN_ALIGN);
1830
1831 /* round up to next alignment */
1832 min = (VM_PIE_MIN_ADDR + align - 1) & ~(align - 1);
1833
1834 if (align >= VM_PIE_MAX_ADDR || min >= VM_PIE_MAX_ADDR)
1835 return (align);
1836
1837 space = (VM_PIE_MAX_ADDR - min) / align;
1838 space = MIN(space, (u_int32_t)-1);
1839
1840 addr = (vaddr_t)arc4random_uniform((u_int32_t)space) * align;
1841 addr += min;
1842
1843 return (addr);
1844 }
1845
1846 void
1847 uvm_unmap(struct vm_map *map, vaddr_t start, vaddr_t end)
1848 {
1849 struct uvm_map_deadq dead;
1850
1851 KASSERT((start & (vaddr_t)PAGE_MASK) == 0 &&
1852 (end & (vaddr_t)PAGE_MASK) == 0);
1853 TAILQ_INIT(&dead);
1854 vm_map_lock(map);
1855 uvm_unmap_remove(map, start, end, &dead, FALSE, TRUE, FALSE);
1856 vm_map_unlock(map);
1857
1858 if (map->flags & VM_MAP_INTRSAFE)
1859 uvm_unmap_detach_intrsafe(&dead);
1860 else
1861 uvm_unmap_detach(&dead, 0);
1862 }
1863
1864 /*
1865 * Mark entry as free.
1866 *
1867 * entry will be put on the dead list.
1868 * The free space will be merged into the previous or a new entry,
1869 * unless markfree is false.
1870 */
1871 void
1872 uvm_mapent_mkfree(struct vm_map *map, struct vm_map_entry *entry,
1873 struct vm_map_entry **prev_ptr, struct uvm_map_deadq *dead,
1874 boolean_t markfree)
1875 {
1876 struct uvm_addr_state *free;
1877 struct vm_map_entry *prev;
1878 vaddr_t addr; /* Start of freed range. */
1879 vaddr_t end; /* End of freed range. */
1880
1881 UVM_MAP_REQ_WRITE(map);
1882
1883 prev = *prev_ptr;
1884 if (prev == entry)
1885 *prev_ptr = prev = NULL;
1886
1887 if (prev == NULL ||
1888 VMMAP_FREE_END(prev) != entry->start)
1889 prev = RBT_PREV(uvm_map_addr, entry);
1890
1891 /* Entry is describing only free memory and has nothing to drain into. */
1892 if (prev == NULL && entry->start == entry->end && markfree) {
1893 *prev_ptr = entry;
1894 return;
1895 }
1896
1897 addr = entry->start;
1898 end = VMMAP_FREE_END(entry);
1899 free = uvm_map_uaddr_e(map, entry);
1900 uvm_mapent_free_remove(map, free, entry);
1901 uvm_mapent_addr_remove(map, entry);
1902 DEAD_ENTRY_PUSH(dead, entry);
1903
1904 if (markfree) {
1905 if (prev) {
1906 free = uvm_map_uaddr_e(map, prev);
1907 uvm_mapent_free_remove(map, free, prev);
1908 }
1909 *prev_ptr = uvm_map_fix_space(map, prev, addr, end, 0);
1910 }
1911 }
1912
1913 /*
1914 * Unwire and release referenced amap and object from map entry.
1915 */
1916 void
1917 uvm_unmap_kill_entry_withlock(struct vm_map *map, struct vm_map_entry *entry,
1918 int needlock)
1919 {
1920 /* Unwire removed map entry. */
1921 if (VM_MAPENT_ISWIRED(entry)) {
1922 KERNEL_LOCK();
1923 entry->wired_count = 0;
1924 uvm_fault_unwire_locked(map, entry->start, entry->end);
1925 KERNEL_UNLOCK();
1926 }
1927
1928 if (needlock)
1929 uvm_map_lock_entry(entry);
1930
1931 /* Entry-type specific code. */
1932 if (UVM_ET_ISHOLE(entry)) {
1933 /* Nothing to be done for holes. */
1934 } else if (map->flags & VM_MAP_INTRSAFE) {
1935 KASSERT(vm_map_pmap(map) == pmap_kernel());
1936
1937 uvm_km_pgremove_intrsafe(entry->start, entry->end);
1938 } else if (UVM_ET_ISOBJ(entry) &&
1939 UVM_OBJ_IS_KERN_OBJECT(entry->object.uvm_obj)) {
1940 KASSERT(vm_map_pmap(map) == pmap_kernel());
1941 /*
1942 * Note: kernel object mappings are currently used in
1943 * two ways:
1944 * [1] "normal" mappings of pages in the kernel object
1945 * [2] uvm_km_valloc'd allocations in which we
1946 * pmap_enter in some non-kernel-object page
1947 * (e.g. vmapbuf).
1948 *
1949 * for case [1], we need to remove the mapping from
1950 * the pmap and then remove the page from the kernel
1951 * object (because, once pages in a kernel object are
1952 * unmapped they are no longer needed, unlike, say,
1953 * a vnode where you might want the data to persist
1954 * until flushed out of a queue).
1955 *
1956 * for case [2], we need to remove the mapping from
1957 * the pmap. there shouldn't be any pages at the
1958 * specified offset in the kernel object [but it
1959 * doesn't hurt to call uvm_km_pgremove just to be
1960 * safe?]
1961 *
1962 * uvm_km_pgremove currently does the following:
1963 * for pages in the kernel object range:
1964 * - drops the swap slot
1965 * - uvm_pagefree the page
1966 *
1967 * note there is version of uvm_km_pgremove() that
1968 * is used for "intrsafe" objects.
1969 */
1970 /*
1971 * remove mappings from pmap and drop the pages
1972 * from the object. offsets are always relative
1973 * to vm_map_min(kernel_map).
1974 */
1975 uvm_km_pgremove(entry->object.uvm_obj, entry->start,
1976 entry->end);
1977 } else {
1978 /* remove mappings the standard way. */
1979 pmap_remove(map->pmap, entry->start, entry->end);
1980 }
1981
1982 if (needlock)
1983 uvm_map_unlock_entry(entry);
1984 }
1985
1986 void
1987 uvm_unmap_kill_entry(struct vm_map *map, struct vm_map_entry *entry)
1988 {
1989 uvm_unmap_kill_entry_withlock(map, entry, 0);
1990 }
1991
1992 /*
1993 * Remove all entries from start to end.
1994 *
1995 * If remove_holes, then remove ET_HOLE entries as well.
1996 * If markfree, entry will be properly marked free, otherwise, no replacement
1997 * entry will be put in the tree (corrupting the tree).
1998 */
1999 int
2000 uvm_unmap_remove(struct vm_map *map, vaddr_t start, vaddr_t end,
2001 struct uvm_map_deadq *dead, boolean_t remove_holes,
2002 boolean_t markfree, boolean_t checkimmutable)
2003 {
2004 struct vm_map_entry *prev_hint, *next, *entry;
2005
2006 start = MAX(start, map->min_offset);
2007 end = MIN(end, map->max_offset);
2008 if (start >= end)
2009 return 0;
2010
2011 vm_map_assert_wrlock(map);
2012
2013 /* Find first affected entry. */
2014 entry = uvm_map_entrybyaddr(&map->addr, start);
2015 KDASSERT(entry != NULL && entry->start <= start);
2016
2017 if (checkimmutable) {
2018 struct vm_map_entry *entry1 = entry;
2019
2020 /* Refuse to unmap if any entries are immutable */
2021 if (entry1->end <= start)
2022 entry1 = RBT_NEXT(uvm_map_addr, entry1);
2023 for (; entry1 != NULL && entry1->start < end; entry1 = next) {
2024 KDASSERT(entry1->start >= start);
2025 next = RBT_NEXT(uvm_map_addr, entry1);
2026 /* Treat memory holes as free space. */
2027 if (entry1->start == entry1->end || UVM_ET_ISHOLE(entry1))
2028 continue;
2029 if (entry1->etype & UVM_ET_IMMUTABLE)
2030 return EPERM;
2031 }
2032 }
2033
2034 if (entry->end <= start && markfree)
2035 entry = RBT_NEXT(uvm_map_addr, entry);
2036 else
2037 UVM_MAP_CLIP_START(map, entry, start);
2038
2039 /*
2040 * Iterate entries until we reach end address.
2041 * prev_hint hints where the freed space can be appended to.
2042 */
2043 prev_hint = NULL;
2044 for (; entry != NULL && entry->start < end; entry = next) {
2045 KDASSERT(entry->start >= start);
2046 if (entry->end > end || !markfree)
2047 UVM_MAP_CLIP_END(map, entry, end);
2048 KDASSERT(entry->start >= start && entry->end <= end);
2049 next = RBT_NEXT(uvm_map_addr, entry);
2050
2051 /* Don't remove holes unless asked to do so. */
2052 if (UVM_ET_ISHOLE(entry)) {
2053 if (!remove_holes) {
2054 prev_hint = entry;
2055 continue;
2056 }
2057 }
2058
2059 /* A stack has been removed.. */
2060 if (UVM_ET_ISSTACK(entry) && (map->flags & VM_MAP_ISVMSPACE))
2061 map->sserial++;
2062
2063 /* Kill entry. */
2064 uvm_unmap_kill_entry_withlock(map, entry, 1);
2065
2066 /* Update space usage. */
2067 if ((map->flags & VM_MAP_ISVMSPACE) &&
2068 entry->object.uvm_obj == NULL &&
2069 entry->protection != PROT_NONE &&
2070 !UVM_ET_ISHOLE(entry)) {
2071 ((struct vmspace *)map)->vm_dused -=
2072 uvmspace_dused(map, entry->start, entry->end);
2073 }
2074 if (!UVM_ET_ISHOLE(entry))
2075 map->size -= entry->end - entry->start;
2076
2077 /* Actual removal of entry. */
2078 uvm_mapent_mkfree(map, entry, &prev_hint, dead, markfree);
2079 }
2080
2081 pmap_update(vm_map_pmap(map));
2082
2083 #ifdef VMMAP_DEBUG
2084 if (markfree) {
2085 for (entry = uvm_map_entrybyaddr(&map->addr, start);
2086 entry != NULL && entry->start < end;
2087 entry = RBT_NEXT(uvm_map_addr, entry)) {
2088 KDASSERT(entry->end <= start ||
2089 entry->start == entry->end ||
2090 UVM_ET_ISHOLE(entry));
2091 }
2092 } else {
2093 vaddr_t a;
2094 for (a = start; a < end; a += PAGE_SIZE)
2095 KDASSERT(uvm_map_entrybyaddr(&map->addr, a) == NULL);
2096 }
2097 #endif
2098 return 0;
2099 }
2100
2101 /*
2102 * Mark all entries from first until end (exclusive) as pageable.
2103 *
2104 * Lock must be exclusive on entry and will not be touched.
2105 */
2106 void
2107 uvm_map_pageable_pgon(struct vm_map *map, struct vm_map_entry *first,
2108 struct vm_map_entry *end, vaddr_t start_addr, vaddr_t end_addr)
2109 {
2110 struct vm_map_entry *iter;
2111
2112 for (iter = first; iter != end;
2113 iter = RBT_NEXT(uvm_map_addr, iter)) {
2114 KDASSERT(iter->start >= start_addr && iter->end <= end_addr);
2115 if (!VM_MAPENT_ISWIRED(iter) || UVM_ET_ISHOLE(iter))
2116 continue;
2117
2118 iter->wired_count = 0;
2119 uvm_fault_unwire_locked(map, iter->start, iter->end);
2120 }
2121 }
2122
2123 /*
2124 * Mark all entries from first until end (exclusive) as wired.
2125 *
2126 * Lockflags determines the lock state on return from this function.
2127 * Lock must be exclusive on entry.
2128 */
2129 int
2130 uvm_map_pageable_wire(struct vm_map *map, struct vm_map_entry *first,
2131 struct vm_map_entry *end, vaddr_t start_addr, vaddr_t end_addr,
2132 int lockflags)
2133 {
2134 struct vm_map_entry *iter;
2135 #ifdef DIAGNOSTIC
2136 unsigned int timestamp_save;
2137 #endif
2138 int error;
2139
2140 /*
2141 * Wire pages in two passes:
2142 *
2143 * 1: holding the write lock, we create any anonymous maps that need
2144 * to be created. then we clip each map entry to the region to
2145 * be wired and increment its wiring count.
2146 *
2147 * 2: we downgrade to a read lock, and call uvm_fault_wire to fault
2148 * in the pages for any newly wired area (wired_count == 1).
2149 *
2150 * downgrading to a read lock for uvm_fault_wire avoids a possible
2151 * deadlock with another thread that may have faulted on one of
2152 * the pages to be wired (it would mark the page busy, blocking
2153 * us, then in turn block on the map lock that we hold).
2154 * because we keep the read lock on the map, the copy-on-write
2155 * status of the entries we modify here cannot change.
2156 */
2157 for (iter = first; iter != end;
2158 iter = RBT_NEXT(uvm_map_addr, iter)) {
2159 KDASSERT(iter->start >= start_addr && iter->end <= end_addr);
2160 if (UVM_ET_ISHOLE(iter) || iter->start == iter->end ||
2161 iter->protection == PROT_NONE)
2162 continue;
2163
2164 /*
2165 * Perform actions of vm_map_lookup that need the write lock.
2166 * - create an anonymous map for copy-on-write
2167 * - anonymous map for zero-fill
2168 * Skip submaps.
2169 */
2170 if (!VM_MAPENT_ISWIRED(iter) && !UVM_ET_ISSUBMAP(iter) &&
2171 UVM_ET_ISNEEDSCOPY(iter) &&
2172 ((iter->protection & PROT_WRITE) ||
2173 iter->object.uvm_obj == NULL)) {
2174 amap_copy(map, iter, M_WAITOK,
2175 UVM_ET_ISSTACK(iter) ? FALSE : TRUE,
2176 iter->start, iter->end);
2177 }
2178 iter->wired_count++;
2179 }
2180
2181 /*
2182 * Pass 2.
2183 */
2184 #ifdef DIAGNOSTIC
2185 timestamp_save = map->timestamp;
2186 #endif
2187 vm_map_busy(map);
2188 vm_map_downgrade(map);
2189
2190 error = 0;
2191 for (iter = first; error == 0 && iter != end;
2192 iter = RBT_NEXT(uvm_map_addr, iter)) {
2193 if (UVM_ET_ISHOLE(iter) || iter->start == iter->end ||
2194 iter->protection == PROT_NONE)
2195 continue;
2196
2197 error = uvm_fault_wire(map, iter->start, iter->end,
2198 iter->protection);
2199 }
2200
2201 if (error) {
2202 /*
2203 * uvm_fault_wire failure
2204 *
2205 * Reacquire lock and undo our work.
2206 */
2207 vm_map_upgrade(map);
2208 vm_map_unbusy(map);
2209 #ifdef DIAGNOSTIC
2210 if (timestamp_save != map->timestamp)
2211 panic("uvm_map_pageable_wire: stale map");
2212 #endif
2213
2214 /*
2215 * first is no longer needed to restart loops.
2216 * Use it as iterator to unmap successful mappings.
2217 */
2218 for (; first != iter;
2219 first = RBT_NEXT(uvm_map_addr, first)) {
2220 if (UVM_ET_ISHOLE(first) ||
2221 first->start == first->end ||
2222 first->protection == PROT_NONE)
2223 continue;
2224
2225 first->wired_count--;
2226 if (!VM_MAPENT_ISWIRED(first)) {
2227 uvm_fault_unwire_locked(map,
2228 first->start, first->end);
2229 }
2230 }
2231
2232 /* decrease counter in the rest of the entries */
2233 for (; iter != end;
2234 iter = RBT_NEXT(uvm_map_addr, iter)) {
2235 if (UVM_ET_ISHOLE(iter) || iter->start == iter->end ||
2236 iter->protection == PROT_NONE)
2237 continue;
2238
2239 iter->wired_count--;
2240 }
2241
2242 if ((lockflags & UVM_LK_EXIT) == 0)
2243 vm_map_unlock(map);
2244 return error;
2245 }
2246
2247 /* We are currently holding a read lock. */
2248 if ((lockflags & UVM_LK_EXIT) == 0) {
2249 vm_map_unbusy(map);
2250 vm_map_unlock_read(map);
2251 } else {
2252 vm_map_upgrade(map);
2253 vm_map_unbusy(map);
2254 #ifdef DIAGNOSTIC
2255 if (timestamp_save != map->timestamp)
2256 panic("uvm_map_pageable_wire: stale map");
2257 #endif
2258 }
2259 return 0;
2260 }
2261
2262 /*
2263 * uvm_map_pageable: set pageability of a range in a map.
2264 *
2265 * Flags:
2266 * UVM_LK_ENTER: map is already locked by caller
2267 * UVM_LK_EXIT: don't unlock map on exit
2268 *
2269 * The full range must be in use (entries may not have fspace != 0).
2270 * UVM_ET_HOLE counts as unmapped.
2271 */
2272 int
2273 uvm_map_pageable(struct vm_map *map, vaddr_t start, vaddr_t end,
2274 boolean_t new_pageable, int lockflags)
2275 {
2276 struct vm_map_entry *first, *last, *tmp;
2277 int error;
2278
2279 start = trunc_page(start);
2280 end = round_page(end);
2281
2282 if (start > end)
2283 return EINVAL;
2284 if (start == end)
2285 return 0; /* nothing to do */
2286 if (start < map->min_offset)
2287 return EFAULT; /* why? see first XXX below */
2288 if (end > map->max_offset)
2289 return EINVAL; /* why? see second XXX below */
2290
2291 KASSERT(map->flags & VM_MAP_PAGEABLE);
2292 if ((lockflags & UVM_LK_ENTER) == 0)
2293 vm_map_lock(map);
2294
2295 /*
2296 * Find first entry.
2297 *
2298 * Initial test on start is different, because of the different
2299 * error returned. Rest is tested further down.
2300 */
2301 first = uvm_map_entrybyaddr(&map->addr, start);
2302 if (first->end <= start || UVM_ET_ISHOLE(first)) {
2303 /*
2304 * XXX if the first address is not mapped, it is EFAULT?
2305 */
2306 error = EFAULT;
2307 goto out;
2308 }
2309
2310 /* Check that the range has no holes. */
2311 for (last = first; last != NULL && last->start < end;
2312 last = RBT_NEXT(uvm_map_addr, last)) {
2313 if (UVM_ET_ISHOLE(last) ||
2314 (last->end < end && VMMAP_FREE_END(last) != last->end)) {
2315 /*
2316 * XXX unmapped memory in range, why is it EINVAL
2317 * instead of EFAULT?
2318 */
2319 error = EINVAL;
2320 goto out;
2321 }
2322 }
2323
2324 /*
2325 * Last ended at the first entry after the range.
2326 * Move back one step.
2327 *
2328 * Note that last may be NULL.
2329 */
2330 if (last == NULL) {
2331 last = RBT_MAX(uvm_map_addr, &map->addr);
2332 if (last->end < end) {
2333 error = EINVAL;
2334 goto out;
2335 }
2336 } else {
2337 KASSERT(last != first);
2338 last = RBT_PREV(uvm_map_addr, last);
2339 }
2340
2341 /* Wire/unwire pages here. */
2342 if (new_pageable) {
2343 /*
2344 * Mark pageable.
2345 * entries that are not wired are untouched.
2346 */
2347 if (VM_MAPENT_ISWIRED(first))
2348 UVM_MAP_CLIP_START(map, first, start);
2349 /*
2350 * Split last at end.
2351 * Make tmp be the first entry after what is to be touched.
2352 * If last is not wired, don't touch it.
2353 */
2354 if (VM_MAPENT_ISWIRED(last)) {
2355 UVM_MAP_CLIP_END(map, last, end);
2356 tmp = RBT_NEXT(uvm_map_addr, last);
2357 } else
2358 tmp = last;
2359
2360 uvm_map_pageable_pgon(map, first, tmp, start, end);
2361 error = 0;
2362
2363 out:
2364 if ((lockflags & UVM_LK_EXIT) == 0)
2365 vm_map_unlock(map);
2366 return error;
2367 } else {
2368 /*
2369 * Mark entries wired.
2370 * entries are always touched (because recovery needs this).
2371 */
2372 if (!VM_MAPENT_ISWIRED(first))
2373 UVM_MAP_CLIP_START(map, first, start);
2374 /*
2375 * Split last at end.
2376 * Make tmp be the first entry after what is to be touched.
2377 * If last is not wired, don't touch it.
2378 */
2379 if (!VM_MAPENT_ISWIRED(last)) {
2380 UVM_MAP_CLIP_END(map, last, end);
2381 tmp = RBT_NEXT(uvm_map_addr, last);
2382 } else
2383 tmp = last;
2384
2385 return uvm_map_pageable_wire(map, first, tmp, start, end,
2386 lockflags);
2387 }
2388 }
2389
2390 /*
2391 * uvm_map_pageable_all: special case of uvm_map_pageable - affects
2392 * all mapped regions.
2393 *
2394 * Map must not be locked.
2395 * If no flags are specified, all ragions are unwired.
2396 */
2397 int
2398 uvm_map_pageable_all(struct vm_map *map, int flags, vsize_t limit)
2399 {
2400 vsize_t size;
2401 struct vm_map_entry *iter;
2402
2403 KASSERT(map->flags & VM_MAP_PAGEABLE);
2404 vm_map_lock(map);
2405
2406 if (flags == 0) {
2407 uvm_map_pageable_pgon(map, RBT_MIN(uvm_map_addr, &map->addr),
2408 NULL, map->min_offset, map->max_offset);
2409
2410 vm_map_modflags(map, 0, VM_MAP_WIREFUTURE);
2411 vm_map_unlock(map);
2412 return 0;
2413 }
2414
2415 if (flags & MCL_FUTURE)
2416 vm_map_modflags(map, VM_MAP_WIREFUTURE, 0);
2417 if (!(flags & MCL_CURRENT)) {
2418 vm_map_unlock(map);
2419 return 0;
2420 }
2421
2422 /*
2423 * Count number of pages in all non-wired entries.
2424 * If the number exceeds the limit, abort.
2425 */
2426 size = 0;
2427 RBT_FOREACH(iter, uvm_map_addr, &map->addr) {
2428 if (VM_MAPENT_ISWIRED(iter) || UVM_ET_ISHOLE(iter))
2429 continue;
2430
2431 size += iter->end - iter->start;
2432 }
2433
2434 if (atop(size) + uvmexp.wired > uvmexp.wiredmax) {
2435 vm_map_unlock(map);
2436 return ENOMEM;
2437 }
2438
2439 /* XXX non-pmap_wired_count case must be handled by caller */
2440 #ifdef pmap_wired_count
2441 if (limit != 0 &&
2442 size + ptoa(pmap_wired_count(vm_map_pmap(map))) > limit) {
2443 vm_map_unlock(map);
2444 return ENOMEM;
2445 }
2446 #endif
2447
2448 /*
2449 * uvm_map_pageable_wire will release lock
2450 */
2451 return uvm_map_pageable_wire(map, RBT_MIN(uvm_map_addr, &map->addr),
2452 NULL, map->min_offset, map->max_offset, 0);
2453 }
2454
2455 /*
2456 * Initialize map.
2457 *
2458 * Allocates sufficient entries to describe the free memory in the map.
2459 */
2460 void
2461 uvm_map_setup(struct vm_map *map, pmap_t pmap, vaddr_t min, vaddr_t max,
2462 int flags)
2463 {
2464 int i;
2465
2466 KASSERT((min & (vaddr_t)PAGE_MASK) == 0);
2467 KASSERT((max & (vaddr_t)PAGE_MASK) == 0 ||
2468 (max & (vaddr_t)PAGE_MASK) == (vaddr_t)PAGE_MASK);
2469
2470 /*
2471 * Update parameters.
2472 *
2473 * This code handles (vaddr_t)-1 and other page mask ending addresses
2474 * properly.
2475 * We lose the top page if the full virtual address space is used.
2476 */
2477 if (max & (vaddr_t)PAGE_MASK) {
2478 max += 1;
2479 if (max == 0) /* overflow */
2480 max -= PAGE_SIZE;
2481 }
2482
2483 RBT_INIT(uvm_map_addr, &map->addr);
2484 map->uaddr_exe = NULL;
2485 for (i = 0; i < nitems(map->uaddr_any); ++i)
2486 map->uaddr_any[i] = NULL;
2487 map->uaddr_brk_stack = NULL;
2488
2489 map->pmap = pmap;
2490 map->size = 0;
2491 map->ref_count = 0;
2492 map->min_offset = min;
2493 map->max_offset = max;
2494 map->b_start = map->b_end = 0; /* Empty brk() area by default. */
2495 map->s_start = map->s_end = 0; /* Empty stack area by default. */
2496 map->flags = flags;
2497 map->timestamp = 0;
2498 if (flags & VM_MAP_ISVMSPACE)
2499 rw_init_flags(&map->lock, "vmmaplk", RWL_DUPOK);
2500 else
2501 rw_init(&map->lock, "kmmaplk");
2502 mtx_init(&map->mtx, IPL_VM);
2503 mtx_init(&map->flags_lock, IPL_VM);
2504
2505 /* Configure the allocators. */
2506 if (flags & VM_MAP_ISVMSPACE)
2507 uvm_map_setup_md(map);
2508 else
2509 map->uaddr_any[3] = &uaddr_kbootstrap;
2510
2511 /*
2512 * Fill map entries.
2513 * We do not need to write-lock the map here because only the current
2514 * thread sees it right now. Initialize ref_count to 0 above to avoid
2515 * bogus triggering of lock-not-held assertions.
2516 */
2517 uvm_map_setup_entries(map);
2518 uvm_tree_sanity(map, __FILE__, __LINE__);
2519 map->ref_count = 1;
2520 }
2521
2522 /*
2523 * Destroy the map.
2524 *
2525 * This is the inverse operation to uvm_map_setup.
2526 */
2527 void
2528 uvm_map_teardown(struct vm_map *map)
2529 {
2530 struct uvm_map_deadq dead_entries;
2531 struct vm_map_entry *entry, *tmp;
2532 #ifdef VMMAP_DEBUG
2533 size_t numq, numt;
2534 #endif
2535 int i;
2536
2537 KERNEL_ASSERT_LOCKED();
2538 KERNEL_UNLOCK();
2539 KERNEL_ASSERT_UNLOCKED();
2540
2541 KASSERT((map->flags & VM_MAP_INTRSAFE) == 0);
2542
2543 vm_map_lock(map);
2544
2545 /* Remove address selectors. */
2546 uvm_addr_destroy(map->uaddr_exe);
2547 map->uaddr_exe = NULL;
2548 for (i = 0; i < nitems(map->uaddr_any); i++) {
2549 uvm_addr_destroy(map->uaddr_any[i]);
2550 map->uaddr_any[i] = NULL;
2551 }
2552 uvm_addr_destroy(map->uaddr_brk_stack);
2553 map->uaddr_brk_stack = NULL;
2554
2555 /*
2556 * Remove entries.
2557 *
2558 * The following is based on graph breadth-first search.
2559 *
2560 * In color terms:
2561 * - the dead_entries set contains all nodes that are reachable
2562 * (i.e. both the black and the grey nodes)
2563 * - any entry not in dead_entries is white
2564 * - any entry that appears in dead_entries before entry,
2565 * is black, the rest is grey.
2566 * The set [entry, end] is also referred to as the wavefront.
2567 *
2568 * Since the tree is always a fully connected graph, the breadth-first
2569 * search guarantees that each vmmap_entry is visited exactly once.
2570 * The vm_map is broken down in linear time.
2571 */
2572 TAILQ_INIT(&dead_entries);
2573 if ((entry = RBT_ROOT(uvm_map_addr, &map->addr)) != NULL)
2574 DEAD_ENTRY_PUSH(&dead_entries, entry);
2575 while (entry != NULL) {
2576 sched_pause(yield);
2577 uvm_unmap_kill_entry(map, entry);
2578 if ((tmp = RBT_LEFT(uvm_map_addr, entry)) != NULL)
2579 DEAD_ENTRY_PUSH(&dead_entries, tmp);
2580 if ((tmp = RBT_RIGHT(uvm_map_addr, entry)) != NULL)
2581 DEAD_ENTRY_PUSH(&dead_entries, tmp);
2582 /* Update wave-front. */
2583 entry = TAILQ_NEXT(entry, dfree.deadq);
2584 }
2585
2586 vm_map_unlock(map);
2587
2588 #ifdef VMMAP_DEBUG
2589 numt = numq = 0;
2590 RBT_FOREACH(entry, uvm_map_addr, &map->addr)
2591 numt++;
2592 TAILQ_FOREACH(entry, &dead_entries, dfree.deadq)
2593 numq++;
2594 KASSERT(numt == numq);
2595 #endif
2596 uvm_unmap_detach(&dead_entries, UVM_PLA_WAITOK);
2597
2598 KERNEL_LOCK();
2599
2600 pmap_destroy(map->pmap);
2601 map->pmap = NULL;
2602 }
2603
2604 /*
2605 * Populate map with free-memory entries.
2606 *
2607 * Map must be initialized and empty.
2608 */
2609 void
2610 uvm_map_setup_entries(struct vm_map *map)
2611 {
2612 KDASSERT(RBT_EMPTY(uvm_map_addr, &map->addr));
2613
2614 uvm_map_fix_space(map, NULL, map->min_offset, map->max_offset, 0);
2615 }
2616
2617 /*
2618 * Split entry at given address.
2619 *
2620 * orig: entry that is to be split.
2621 * next: a newly allocated map entry that is not linked.
2622 * split: address at which the split is done.
2623 */
2624 void
2625 uvm_map_splitentry(struct vm_map *map, struct vm_map_entry *orig,
2626 struct vm_map_entry *next, vaddr_t split)
2627 {
2628 struct uvm_addr_state *free, *free_before;
2629 vsize_t adj;
2630
2631 if ((split & PAGE_MASK) != 0) {
2632 panic("uvm_map_splitentry: split address 0x%lx "
2633 "not on page boundary!", split);
2634 }
2635 KDASSERT(map != NULL && orig != NULL && next != NULL);
2636 uvm_tree_sanity(map, __FILE__, __LINE__);
2637 KASSERT(orig->start < split && VMMAP_FREE_END(orig) > split);
2638
2639 #ifdef VMMAP_DEBUG
2640 KDASSERT(RBT_FIND(uvm_map_addr, &map->addr, orig) == orig);
2641 KDASSERT(RBT_FIND(uvm_map_addr, &map->addr, next) != next);
2642 #endif /* VMMAP_DEBUG */
2643
2644 /*
2645 * Free space will change, unlink from free space tree.
2646 */
2647 free = uvm_map_uaddr_e(map, orig);
2648 uvm_mapent_free_remove(map, free, orig);
2649
2650 adj = split - orig->start;
2651
2652 uvm_mapent_copy(orig, next);
2653 if (split >= orig->end) {
2654 next->etype = 0;
2655 next->offset = 0;
2656 next->wired_count = 0;
2657 next->start = next->end = split;
2658 next->guard = 0;
2659 next->fspace = VMMAP_FREE_END(orig) - split;
2660 next->aref.ar_amap = NULL;
2661 next->aref.ar_pageoff = 0;
2662 orig->guard = MIN(orig->guard, split - orig->end);
2663 orig->fspace = split - VMMAP_FREE_START(orig);
2664 } else {
2665 orig->fspace = 0;
2666 orig->guard = 0;
2667 orig->end = next->start = split;
2668
2669 if (next->aref.ar_amap) {
2670 amap_splitref(&orig->aref, &next->aref, adj);
2671 }
2672 if (UVM_ET_ISSUBMAP(orig)) {
2673 uvm_map_reference(next->object.sub_map);
2674 next->offset += adj;
2675 } else if (UVM_ET_ISOBJ(orig)) {
2676 if (next->object.uvm_obj->pgops &&
2677 next->object.uvm_obj->pgops->pgo_reference) {
2678 KERNEL_LOCK();
2679 next->object.uvm_obj->pgops->pgo_reference(
2680 next->object.uvm_obj);
2681 KERNEL_UNLOCK();
2682 }
2683 next->offset += adj;
2684 }
2685 }
2686
2687 /*
2688 * Link next into address tree.
2689 * Link orig and next into free-space tree.
2690 *
2691 * Don't insert 'next' into the addr tree until orig has been linked,
2692 * in case the free-list looks at adjecent entries in the addr tree
2693 * for its decisions.
2694 */
2695 if (orig->fspace > 0)
2696 free_before = free;
2697 else
2698 free_before = uvm_map_uaddr_e(map, orig);
2699 uvm_mapent_free_insert(map, free_before, orig);
2700 uvm_mapent_addr_insert(map, next);
2701 uvm_mapent_free_insert(map, free, next);
2702
2703 uvm_tree_sanity(map, __FILE__, __LINE__);
2704 }
2705
2706
2707 #ifdef VMMAP_DEBUG
2708
2709 void
2710 uvm_tree_assert(struct vm_map *map, int test, char *test_str,
2711 char *file, int line)
2712 {
2713 char* map_special;
2714
2715 if (test)
2716 return;
2717
2718 if (map == kernel_map)
2719 map_special = " (kernel_map)";
2720 else if (map == kmem_map)
2721 map_special = " (kmem_map)";
2722 else
2723 map_special = "";
2724 panic("uvm_tree_sanity %p%s (%s %d): %s", map, map_special, file,
2725 line, test_str);
2726 }
2727
2728 /*
2729 * Check that map is sane.
2730 */
2731 void
2732 uvm_tree_sanity(struct vm_map *map, char *file, int line)
2733 {
2734 struct vm_map_entry *iter;
2735 vaddr_t addr;
2736 vaddr_t min, max, bound; /* Bounds checker. */
2737 struct uvm_addr_state *free;
2738
2739 addr = vm_map_min(map);
2740 RBT_FOREACH(iter, uvm_map_addr, &map->addr) {
2741 /*
2742 * Valid start, end.
2743 * Catch overflow for end+fspace.
2744 */
2745 UVM_ASSERT(map, iter->end >= iter->start, file, line);
2746 UVM_ASSERT(map, VMMAP_FREE_END(iter) >= iter->end, file, line);
2747
2748 /* May not be empty. */
2749 UVM_ASSERT(map, iter->start < VMMAP_FREE_END(iter),
2750 file, line);
2751
2752 /* Addresses for entry must lie within map boundaries. */
2753 UVM_ASSERT(map, iter->start >= vm_map_min(map) &&
2754 VMMAP_FREE_END(iter) <= vm_map_max(map), file, line);
2755
2756 /* Tree may not have gaps. */
2757 UVM_ASSERT(map, iter->start == addr, file, line);
2758 addr = VMMAP_FREE_END(iter);
2759
2760 /*
2761 * Free space may not cross boundaries, unless the same
2762 * free list is used on both sides of the border.
2763 */
2764 min = VMMAP_FREE_START(iter);
2765 max = VMMAP_FREE_END(iter);
2766
2767 while (min < max &&
2768 (bound = uvm_map_boundary(map, min, max)) != max) {
2769 UVM_ASSERT(map,
2770 uvm_map_uaddr(map, bound - 1) ==
2771 uvm_map_uaddr(map, bound),
2772 file, line);
2773 min = bound;
2774 }
2775
2776 free = uvm_map_uaddr_e(map, iter);
2777 if (free) {
2778 UVM_ASSERT(map, (iter->etype & UVM_ET_FREEMAPPED) != 0,
2779 file, line);
2780 } else {
2781 UVM_ASSERT(map, (iter->etype & UVM_ET_FREEMAPPED) == 0,
2782 file, line);
2783 }
2784 }
2785 UVM_ASSERT(map, addr == vm_map_max(map), file, line);
2786 }
2787
2788 void
2789 uvm_tree_size_chk(struct vm_map *map, char *file, int line)
2790 {
2791 struct vm_map_entry *iter;
2792 vsize_t size;
2793
2794 size = 0;
2795 RBT_FOREACH(iter, uvm_map_addr, &map->addr) {
2796 if (!UVM_ET_ISHOLE(iter))
2797 size += iter->end - iter->start;
2798 }
2799
2800 if (map->size != size)
2801 printf("map size = 0x%lx, should be 0x%lx\n", map->size, size);
2802 UVM_ASSERT(map, map->size == size, file, line);
2803
2804 vmspace_validate(map);
2805 }
2806
2807 /*
2808 * This function validates the statistics on vmspace.
2809 */
2810 void
2811 vmspace_validate(struct vm_map *map)
2812 {
2813 struct vmspace *vm;
2814 struct vm_map_entry *iter;
2815 vaddr_t imin, imax;
2816 vaddr_t stack_begin, stack_end; /* Position of stack. */
2817 vsize_t stack, heap; /* Measured sizes. */
2818
2819 if (!(map->flags & VM_MAP_ISVMSPACE))
2820 return;
2821
2822 vm = (struct vmspace *)map;
2823 stack_begin = MIN((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr);
2824 stack_end = MAX((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr);
2825
2826 stack = heap = 0;
2827 RBT_FOREACH(iter, uvm_map_addr, &map->addr) {
2828 imin = imax = iter->start;
2829
2830 if (UVM_ET_ISHOLE(iter) || iter->object.uvm_obj != NULL ||
2831 iter->protection != PROT_NONE)
2832 continue;
2833
2834 /*
2835 * Update stack, heap.
2836 * Keep in mind that (theoretically) the entries of
2837 * userspace and stack may be joined.
2838 */
2839 while (imin != iter->end) {
2840 /*
2841 * Set imax to the first boundary crossed between
2842 * imin and stack addresses.
2843 */
2844 imax = iter->end;
2845 if (imin < stack_begin && imax > stack_begin)
2846 imax = stack_begin;
2847 else if (imin < stack_end && imax > stack_end)
2848 imax = stack_end;
2849
2850 if (imin >= stack_begin && imin < stack_end)
2851 stack += imax - imin;
2852 else
2853 heap += imax - imin;
2854 imin = imax;
2855 }
2856 }
2857
2858 heap >>= PAGE_SHIFT;
2859 if (heap != vm->vm_dused) {
2860 printf("vmspace stack range: 0x%lx-0x%lx\n",
2861 stack_begin, stack_end);
2862 panic("vmspace_validate: vmspace.vm_dused invalid, "
2863 "expected %ld pgs, got %d pgs in map %p",
2864 heap, vm->vm_dused,
2865 map);
2866 }
2867 }
2868
2869 #endif /* VMMAP_DEBUG */
2870
2871 /*
2872 * uvm_map_init: init mapping system at boot time. note that we allocate
2873 * and init the static pool of structs vm_map_entry for the kernel here.
2874 */
2875 void
2876 uvm_map_init(void)
2877 {
2878 static struct vm_map_entry kernel_map_entry[MAX_KMAPENT];
2879 int lcv;
2880
2881 /* now set up static pool of kernel map entries ... */
2882 mtx_init(&uvm_kmapent_mtx, IPL_VM);
2883 SLIST_INIT(&uvm.kentry_free);
2884 for (lcv = 0 ; lcv < MAX_KMAPENT ; lcv++) {
2885 SLIST_INSERT_HEAD(&uvm.kentry_free,
2886 &kernel_map_entry[lcv], daddrs.addr_kentry);
2887 }
2888
2889 /* initialize the map-related pools. */
2890 pool_init(&uvm_vmspace_pool, sizeof(struct vmspace), 0,
2891 IPL_NONE, PR_WAITOK, "vmsppl", NULL);
2892 pool_init(&uvm_map_entry_pool, sizeof(struct vm_map_entry), 0,
2893 IPL_VM, PR_WAITOK, "vmmpepl", NULL);
2894 pool_init(&uvm_map_entry_kmem_pool, sizeof(struct vm_map_entry), 0,
2895 IPL_VM, 0, "vmmpekpl", NULL);
2896 pool_sethiwat(&uvm_map_entry_pool, 8192);
2897
2898 uvm_addr_init();
2899 }
2900
2901 #if defined(DDB)
2902
2903 /*
2904 * DDB hooks
2905 */
2906
2907 /*
2908 * uvm_map_printit: actually prints the map
2909 */
2910 void
2911 uvm_map_printit(struct vm_map *map, boolean_t full,
2912 int (*pr)(const char *, ...))
2913 {
2914 struct vmspace *vm;
2915 struct vm_map_entry *entry;
2916 struct uvm_addr_state *free;
2917 int in_free, i;
2918 char buf[8];
2919
2920 (*pr)("MAP %p: [0x%lx->0x%lx]\n", map, map->min_offset,map->max_offset);
2921 (*pr)("\tbrk() allocate range: 0x%lx-0x%lx\n",
2922 map->b_start, map->b_end);
2923 (*pr)("\tstack allocate range: 0x%lx-0x%lx\n",
2924 map->s_start, map->s_end);
2925 (*pr)("\tsz=%u, ref=%d, version=%u, flags=0x%x\n",
2926 map->size, map->ref_count, map->timestamp,
2927 map->flags);
2928 (*pr)("\tpmap=%p(resident=%d)\n", map->pmap,
2929 pmap_resident_count(map->pmap));
2930
2931 /* struct vmspace handling. */
2932 if (map->flags & VM_MAP_ISVMSPACE) {
2933 vm = (struct vmspace *)map;
2934
2935 (*pr)("\tvm_refcnt=%d vm_shm=%p vm_rssize=%u vm_swrss=%u\n",
2936 vm->vm_refcnt, vm->vm_shm, vm->vm_rssize, vm->vm_swrss);
2937 (*pr)("\tvm_tsize=%u vm_dsize=%u\n",
2938 vm->vm_tsize, vm->vm_dsize);
2939 (*pr)("\tvm_taddr=%p vm_daddr=%p\n",
2940 vm->vm_taddr, vm->vm_daddr);
2941 (*pr)("\tvm_maxsaddr=%p vm_minsaddr=%p\n",
2942 vm->vm_maxsaddr, vm->vm_minsaddr);
2943 }
2944
2945 if (!full)
2946 goto print_uaddr;
2947 RBT_FOREACH(entry, uvm_map_addr, &map->addr) {
2948 (*pr)(" - %p: 0x%lx->0x%lx: obj=%p/0x%llx, amap=%p/%d\n",
2949 entry, entry->start, entry->end, entry->object.uvm_obj,
2950 (long long)entry->offset, entry->aref.ar_amap,
2951 entry->aref.ar_pageoff);
2952 (*pr)("\tsubmap=%c, cow=%c, nc=%c, stack=%c, "
2953 "syscall=%c, prot(max)=%d/%d, inh=%d, "
2954 "wc=%d, adv=%d\n",
2955 (entry->etype & UVM_ET_SUBMAP) ? 'T' : 'F',
2956 (entry->etype & UVM_ET_COPYONWRITE) ? 'T' : 'F',
2957 (entry->etype & UVM_ET_NEEDSCOPY) ? 'T' : 'F',
2958 (entry->etype & UVM_ET_STACK) ? 'T' : 'F',
2959 (entry->etype & UVM_ET_SYSCALL) ? 'T' : 'F',
2960 entry->protection, entry->max_protection,
2961 entry->inheritance, entry->wired_count, entry->advice);
2962
2963 free = uvm_map_uaddr_e(map, entry);
2964 in_free = (free != NULL);
2965 (*pr)("\thole=%c, free=%c, guard=0x%lx, "
2966 "free=0x%lx-0x%lx\n",
2967 (entry->etype & UVM_ET_HOLE) ? 'T' : 'F',
2968 in_free ? 'T' : 'F',
2969 entry->guard,
2970 VMMAP_FREE_START(entry), VMMAP_FREE_END(entry));
2971 (*pr)("\tfspace_augment=%lu\n", entry->fspace_augment);
2972 (*pr)("\tfreemapped=%c, uaddr=%p\n",
2973 (entry->etype & UVM_ET_FREEMAPPED) ? 'T' : 'F', free);
2974 if (free) {
2975 (*pr)("\t\t(0x%lx-0x%lx %s)\n",
2976 free->uaddr_minaddr, free->uaddr_maxaddr,
2977 free->uaddr_functions->uaddr_name);
2978 }
2979 }
2980
2981 print_uaddr:
2982 uvm_addr_print(map->uaddr_exe, "exe", full, pr);
2983 for (i = 0; i < nitems(map->uaddr_any); i++) {
2984 snprintf(&buf[0], sizeof(buf), "any[%d]", i);
2985 uvm_addr_print(map->uaddr_any[i], &buf[0], full, pr);
2986 }
2987 uvm_addr_print(map->uaddr_brk_stack, "brk/stack", full, pr);
2988 }
2989
2990 /*
2991 * uvm_object_printit: actually prints the object
2992 */
2993 void
2994 uvm_object_printit(struct uvm_object *uobj, boolean_t full,
2995 int (*pr)(const char *, ...))
2996 {
2997 struct vm_page *pg;
2998 int cnt = 0;
2999
3000 (*pr)("OBJECT %p: pgops=%p, npages=%d, ",
3001 uobj, uobj->pgops, uobj->uo_npages);
3002 if (UVM_OBJ_IS_KERN_OBJECT(uobj))
3003 (*pr)("refs=<SYSTEM>\n");
3004 else
3005 (*pr)("refs=%d\n", uobj->uo_refs);
3006
3007 if (!full) {
3008 return;
3009 }
3010 (*pr)(" PAGES <pg,offset>:\n ");
3011 RBT_FOREACH(pg, uvm_objtree, &uobj->memt) {
3012 (*pr)("<%p,0x%llx> ", pg, (long long)pg->offset);
3013 if ((cnt % 3) == 2) {
3014 (*pr)("\n ");
3015 }
3016 cnt++;
3017 }
3018 if ((cnt % 3) != 2) {
3019 (*pr)("\n");
3020 }
3021 }
3022
3023 /*
3024 * uvm_page_printit: actually print the page
3025 */
3026 static const char page_flagbits[] =
3027 "\2\1BUSY\2WANTED\3TABLED\4CLEAN\5CLEANCHK\6RELEASED\7FAKE\10RDONLY"
3028 "\11ZERO\12DEV\15PAGER1\21FREE\22INACTIVE\23ACTIVE\25ANON\26AOBJ"
3029 "\27ENCRYPT\31PMAP0\32PMAP1\33PMAP2\34PMAP3\35PMAP4\36PMAP5";
3030
3031 void
3032 uvm_page_printit(struct vm_page *pg, boolean_t full,
3033 int (*pr)(const char *, ...))
3034 {
3035 struct vm_page *tpg;
3036 struct uvm_object *uobj;
3037 struct pglist *pgl;
3038
3039 (*pr)("PAGE %p:\n", pg);
3040 (*pr)(" flags=%b, vers=%d, wire_count=%d, pa=0x%llx\n",
3041 pg->pg_flags, page_flagbits, pg->pg_version, pg->wire_count,
3042 (long long)pg->phys_addr);
3043 (*pr)(" uobject=%p, uanon=%p, offset=0x%llx\n",
3044 pg->uobject, pg->uanon, (long long)pg->offset);
3045 #if defined(UVM_PAGE_TRKOWN)
3046 if (pg->pg_flags & PG_BUSY)
3047 (*pr)(" owning thread = %d, tag=%s",
3048 pg->owner, pg->owner_tag);
3049 else
3050 (*pr)(" page not busy, no owner");
3051 #else
3052 (*pr)(" [page ownership tracking disabled]");
3053 #endif
3054 (*pr)("\tvm_page_md %p\n", &pg->mdpage);
3055
3056 if (!full)
3057 return;
3058
3059 /* cross-verify object/anon */
3060 if ((pg->pg_flags & PQ_FREE) == 0) {
3061 if (pg->pg_flags & PQ_ANON) {
3062 if (pg->uanon == NULL || pg->uanon->an_page != pg)
3063 (*pr)(" >>> ANON DOES NOT POINT HERE <<< (%p)\n",
3064 (pg->uanon) ? pg->uanon->an_page : NULL);
3065 else
3066 (*pr)(" anon backpointer is OK\n");
3067 } else {
3068 uobj = pg->uobject;
3069 if (uobj) {
3070 (*pr)(" checking object list\n");
3071 RBT_FOREACH(tpg, uvm_objtree, &uobj->memt) {
3072 if (tpg == pg) {
3073 break;
3074 }
3075 }
3076 if (tpg)
3077 (*pr)(" page found on object list\n");
3078 else
3079 (*pr)(" >>> PAGE NOT FOUND "
3080 "ON OBJECT LIST! <<<\n");
3081 }
3082 }
3083 }
3084
3085 /* cross-verify page queue */
3086 if (pg->pg_flags & PQ_FREE) {
3087 if (uvm_pmr_isfree(pg))
3088 (*pr)(" page found in uvm_pmemrange\n");
3089 else
3090 (*pr)(" >>> page not found in uvm_pmemrange <<<\n");
3091 pgl = NULL;
3092 } else if (pg->pg_flags & PQ_INACTIVE) {
3093 pgl = &uvm.page_inactive;
3094 } else if (pg->pg_flags & PQ_ACTIVE) {
3095 pgl = &uvm.page_active;
3096 } else {
3097 pgl = NULL;
3098 }
3099
3100 if (pgl) {
3101 (*pr)(" checking pageq list\n");
3102 TAILQ_FOREACH(tpg, pgl, pageq) {
3103 if (tpg == pg) {
3104 break;
3105 }
3106 }
3107 if (tpg)
3108 (*pr)(" page found on pageq list\n");
3109 else
3110 (*pr)(" >>> PAGE NOT FOUND ON PAGEQ LIST! <<<\n");
3111 }
3112 }
3113 #endif
3114
3115 /*
3116 * uvm_map_protect: change map protection
3117 *
3118 * => set_max means set max_protection.
3119 * => map must be unlocked.
3120 */
3121 int
3122 uvm_map_protect(struct vm_map *map, vaddr_t start, vaddr_t end,
3123 vm_prot_t new_prot, int etype, boolean_t set_max, boolean_t checkimmutable)
3124 {
3125 struct vm_map_entry *first, *iter;
3126 vm_prot_t old_prot;
3127 vm_prot_t mask;
3128 vsize_t dused;
3129 int error;
3130
3131 KASSERT((etype & ~UVM_ET_STACK) == 0); /* only UVM_ET_STACK allowed */
3132
3133 if (start > end)
3134 return EINVAL;
3135 start = MAX(start, map->min_offset);
3136 end = MIN(end, map->max_offset);
3137 if (start >= end)
3138 return 0;
3139
3140 dused = 0;
3141 error = 0;
3142 vm_map_lock(map);
3143
3144 /*
3145 * Set up first and last.
3146 * - first will contain first entry at or after start.
3147 */
3148 first = uvm_map_entrybyaddr(&map->addr, start);
3149 KDASSERT(first != NULL);
3150 if (first->end <= start)
3151 first = RBT_NEXT(uvm_map_addr, first);
3152
3153 /* First, check for protection violations. */
3154 for (iter = first; iter != NULL && iter->start < end;
3155 iter = RBT_NEXT(uvm_map_addr, iter)) {
3156 /* Treat memory holes as free space. */
3157 if (iter->start == iter->end || UVM_ET_ISHOLE(iter))
3158 continue;
3159
3160 if (checkimmutable &&
3161 (iter->etype & UVM_ET_IMMUTABLE)) {
3162 if (iter->protection == (PROT_READ | PROT_WRITE) &&
3163 new_prot == PROT_READ) {
3164 /* Permit RW to R as a data-locking mechanism */
3165 ;
3166 } else {
3167 error = EPERM;
3168 goto out;
3169 }
3170 }
3171 old_prot = iter->protection;
3172 if (old_prot == PROT_NONE && new_prot != old_prot) {
3173 dused += uvmspace_dused(
3174 map, MAX(start, iter->start), MIN(end, iter->end));
3175 }
3176
3177 if (UVM_ET_ISSUBMAP(iter)) {
3178 error = EINVAL;
3179 goto out;
3180 }
3181 if ((new_prot & iter->max_protection) != new_prot) {
3182 error = EACCES;
3183 goto out;
3184 }
3185 if (map == kernel_map &&
3186 (new_prot & (PROT_WRITE | PROT_EXEC)) == (PROT_WRITE | PROT_EXEC))
3187 panic("uvm_map_protect: kernel map W^X violation requested");
3188 }
3189
3190 /* Check limits. */
3191 if (dused > 0 && (map->flags & VM_MAP_ISVMSPACE)) {
3192 vsize_t limit = lim_cur(RLIMIT_DATA);
3193 dused = ptoa(dused);
3194 if (limit < dused ||
3195 limit - dused < ptoa(((struct vmspace *)map)->vm_dused)) {
3196 error = ENOMEM;
3197 goto out;
3198 }
3199 }
3200
3201 /* only apply UVM_ET_STACK on a mapping changing to RW */
3202 if (etype && new_prot != (PROT_READ|PROT_WRITE))
3203 etype = 0;
3204
3205 /* Fix protections. */
3206 for (iter = first; iter != NULL && iter->start < end;
3207 iter = RBT_NEXT(uvm_map_addr, iter)) {
3208 /* Treat memory holes as free space. */
3209 if (iter->start == iter->end || UVM_ET_ISHOLE(iter))
3210 continue;
3211
3212 old_prot = iter->protection;
3213
3214 /*
3215 * Skip adapting protection iff old and new protection
3216 * are equal.
3217 */
3218 if (set_max) {
3219 if (old_prot == (new_prot & old_prot) &&
3220 iter->max_protection == new_prot)
3221 continue;
3222 } else {
3223 if (old_prot == new_prot)
3224 continue;
3225 }
3226
3227 UVM_MAP_CLIP_START(map, iter, start);
3228 UVM_MAP_CLIP_END(map, iter, end);
3229
3230 if (set_max) {
3231 iter->max_protection = new_prot;
3232 iter->protection &= new_prot;
3233 } else
3234 iter->protection = new_prot;
3235 iter->etype |= etype; /* potentially add UVM_ET_STACK */
3236
3237 /*
3238 * update physical map if necessary. worry about copy-on-write
3239 * here -- CHECK THIS XXX
3240 */
3241 if (iter->protection != old_prot) {
3242 mask = UVM_ET_ISCOPYONWRITE(iter) ?
3243 ~PROT_WRITE : PROT_MASK;
3244
3245 /* XXX should only wserial++ if no split occurs */
3246 if (iter->protection & PROT_WRITE)
3247 map->wserial++;
3248
3249 if (map->flags & VM_MAP_ISVMSPACE) {
3250 if (old_prot == PROT_NONE) {
3251 ((struct vmspace *)map)->vm_dused +=
3252 uvmspace_dused(map, iter->start,
3253 iter->end);
3254 }
3255 if (iter->protection == PROT_NONE) {
3256 ((struct vmspace *)map)->vm_dused -=
3257 uvmspace_dused(map, iter->start,
3258 iter->end);
3259 }
3260 }
3261
3262 /* update pmap */
3263 if ((iter->protection & mask) == PROT_NONE &&
3264 VM_MAPENT_ISWIRED(iter)) {
3265 /*
3266 * TODO(ariane) this is stupid. wired_count
3267 * is 0 if not wired, otherwise anything
3268 * larger than 0 (incremented once each time
3269 * wire is called).
3270 * Mostly to be able to undo the damage on
3271 * failure. Not the actually be a wired
3272 * refcounter...
3273 * Originally: iter->wired_count--;
3274 * (don't we have to unwire this in the pmap
3275 * as well?)
3276 */
3277 iter->wired_count = 0;
3278 }
3279 uvm_map_lock_entry(iter);
3280 pmap_protect(map->pmap, iter->start, iter->end,
3281 iter->protection & mask);
3282 uvm_map_unlock_entry(iter);
3283 }
3284
3285 /*
3286 * If the map is configured to lock any future mappings,
3287 * wire this entry now if the old protection was PROT_NONE
3288 * and the new protection is not PROT_NONE.
3289 */
3290 if ((map->flags & VM_MAP_WIREFUTURE) != 0 &&
3291 VM_MAPENT_ISWIRED(iter) == 0 &&
3292 old_prot == PROT_NONE &&
3293 new_prot != PROT_NONE) {
3294 if (uvm_map_pageable(map, iter->start, iter->end,
3295 FALSE, UVM_LK_ENTER | UVM_LK_EXIT) != 0) {
3296 /*
3297 * If locking the entry fails, remember the
3298 * error if it's the first one. Note we
3299 * still continue setting the protection in
3300 * the map, but it will return the resource
3301 * storage condition regardless.
3302 *
3303 * XXX Ignore what the actual error is,
3304 * XXX just call it a resource shortage
3305 * XXX so that it doesn't get confused
3306 * XXX what uvm_map_protect() itself would
3307 * XXX normally return.
3308 */
3309 error = ENOMEM;
3310 }
3311 }
3312 }
3313 pmap_update(map->pmap);
3314
3315 out:
3316 if (etype & UVM_ET_STACK)
3317 map->sserial++;
3318 vm_map_unlock(map);
3319 return error;
3320 }
3321
3322 /*
3323 * uvmspace_alloc: allocate a vmspace structure.
3324 *
3325 * - structure includes vm_map and pmap
3326 * - XXX: no locking on this structure
3327 * - refcnt set to 1, rest must be init'd by caller
3328 */
3329 struct vmspace *
3330 uvmspace_alloc(vaddr_t min, vaddr_t max, boolean_t pageable,
3331 boolean_t remove_holes)
3332 {
3333 struct vmspace *vm;
3334
3335 vm = pool_get(&uvm_vmspace_pool, PR_WAITOK | PR_ZERO);
3336 uvmspace_init(vm, NULL, min, max, pageable, remove_holes);
3337 return (vm);
3338 }
3339
3340 /*
3341 * uvmspace_init: initialize a vmspace structure.
3342 *
3343 * - XXX: no locking on this structure
3344 * - refcnt set to 1, rest must be init'd by caller
3345 */
3346 void
3347 uvmspace_init(struct vmspace *vm, struct pmap *pmap, vaddr_t min, vaddr_t max,
3348 boolean_t pageable, boolean_t remove_holes)
3349 {
3350 KASSERT(pmap == NULL || pmap == pmap_kernel());
3351
3352 if (pmap)
3353 pmap_reference(pmap);
3354 else
3355 pmap = pmap_create();
3356
3357 uvm_map_setup(&vm->vm_map, pmap, min, max,
3358 (pageable ? VM_MAP_PAGEABLE : 0) | VM_MAP_ISVMSPACE);
3359
3360 vm->vm_refcnt = 1;
3361
3362 if (remove_holes)
3363 pmap_remove_holes(vm);
3364 }
3365
3366 /*
3367 * uvmspace_share: share a vmspace between two processes
3368 *
3369 * - used for vfork
3370 */
3371
3372 struct vmspace *
3373 uvmspace_share(struct process *pr)
3374 {
3375 struct vmspace *vm = pr->ps_vmspace;
3376
3377 uvmspace_addref(vm);
3378 return vm;
3379 }
3380
3381 /*
3382 * uvmspace_exec: the process wants to exec a new program
3383 *
3384 * - XXX: no locking on vmspace
3385 */
3386
3387 void
3388 uvmspace_exec(struct proc *p, vaddr_t start, vaddr_t end)
3389 {
3390 struct process *pr = p->p_p;
3391 struct vmspace *nvm, *ovm = pr->ps_vmspace;
3392 struct vm_map *map = &ovm->vm_map;
3393 struct uvm_map_deadq dead_entries;
3394
3395 KASSERT((start & (vaddr_t)PAGE_MASK) == 0);
3396 KASSERT((end & (vaddr_t)PAGE_MASK) == 0 ||
3397 (end & (vaddr_t)PAGE_MASK) == (vaddr_t)PAGE_MASK);
3398
3399 pmap_unuse_final(p); /* before stack addresses go away */
3400 TAILQ_INIT(&dead_entries);
3401
3402 /* see if more than one process is using this vmspace... */
3403 if (ovm->vm_refcnt == 1) {
3404 /*
3405 * If pr is the only process using its vmspace then
3406 * we can safely recycle that vmspace for the program
3407 * that is being exec'd.
3408 */
3409
3410 #ifdef SYSVSHM
3411 /*
3412 * SYSV SHM semantics require us to kill all segments on an exec
3413 */
3414 if (ovm->vm_shm)
3415 shmexit(ovm);
3416 #endif
3417
3418 /*
3419 * POSIX 1003.1b -- "lock future mappings" is revoked
3420 * when a process execs another program image.
3421 */
3422 vm_map_lock(map);
3423 vm_map_modflags(map, 0, VM_MAP_WIREFUTURE|VM_MAP_SYSCALL_ONCE);
3424
3425 /*
3426 * now unmap the old program
3427 *
3428 * Instead of attempting to keep the map valid, we simply
3429 * nuke all entries and ask uvm_map_setup to reinitialize
3430 * the map to the new boundaries.
3431 *
3432 * uvm_unmap_remove will actually nuke all entries for us
3433 * (as in, not replace them with free-memory entries).
3434 */
3435 uvm_unmap_remove(map, map->min_offset, map->max_offset,
3436 &dead_entries, TRUE, FALSE, FALSE);
3437
3438 KDASSERT(RBT_EMPTY(uvm_map_addr, &map->addr));
3439
3440 /* Nuke statistics and boundaries. */
3441 memset(&ovm->vm_startcopy, 0,
3442 (caddr_t) (ovm + 1) - (caddr_t) &ovm->vm_startcopy);
3443
3444
3445 if (end & (vaddr_t)PAGE_MASK) {
3446 end += 1;
3447 if (end == 0) /* overflow */
3448 end -= PAGE_SIZE;
3449 }
3450
3451 /* Setup new boundaries and populate map with entries. */
3452 map->min_offset = start;
3453 map->max_offset = end;
3454 uvm_map_setup_entries(map);
3455 vm_map_unlock(map);
3456
3457 /* but keep MMU holes unavailable */
3458 pmap_remove_holes(ovm);
3459 } else {
3460 /*
3461 * pr's vmspace is being shared, so we can't reuse
3462 * it for pr since it is still being used for others.
3463 * allocate a new vmspace for pr
3464 */
3465 nvm = uvmspace_alloc(start, end,
3466 (map->flags & VM_MAP_PAGEABLE) ? TRUE : FALSE, TRUE);
3467
3468 /* install new vmspace and drop our ref to the old one. */
3469 pmap_deactivate(p);
3470 p->p_vmspace = pr->ps_vmspace = nvm;
3471 pmap_activate(p);
3472
3473 uvmspace_free(ovm);
3474 }
3475 #ifdef PMAP_CHECK_COPYIN
3476 p->p_vmspace->vm_map.check_copyin_count = 0; /* disable checks */
3477 #endif
3478
3479 /* Release dead entries */
3480 uvm_unmap_detach(&dead_entries, 0);
3481 }
3482
3483 /*
3484 * uvmspace_addref: add a reference to a vmspace.
3485 */
3486 void
3487 uvmspace_addref(struct vmspace *vm)
3488 {
3489 KERNEL_ASSERT_LOCKED();
3490 KASSERT(vm->vm_refcnt > 0);
3491
3492 vm->vm_refcnt++;
3493 }
3494
3495 /*
3496 * uvmspace_free: free a vmspace data structure
3497 */
3498 void
3499 uvmspace_free(struct vmspace *vm)
3500 {
3501 KERNEL_ASSERT_LOCKED();
3502
3503 if (--vm->vm_refcnt == 0) {
3504 /*
3505 * lock the map, to wait out all other references to it. delete
3506 * all of the mappings and pages they hold, then call the pmap
3507 * module to reclaim anything left.
3508 */
3509 #ifdef SYSVSHM
3510 /* Get rid of any SYSV shared memory segments. */
3511 if (vm->vm_shm != NULL)
3512 shmexit(vm);
3513 #endif
3514
3515 uvm_map_teardown(&vm->vm_map);
3516 pool_put(&uvm_vmspace_pool, vm);
3517 }
3518 }
3519
3520 /*
3521 * uvm_share: Map the address range [srcaddr, srcaddr + sz) in
3522 * srcmap to the address range [dstaddr, dstaddr + sz) in
3523 * dstmap.
3524 *
3525 * The whole address range in srcmap must be backed by an object
3526 * (no holes).
3527 *
3528 * If successful, the address ranges share memory and the destination
3529 * address range uses the protection flags in prot.
3530 *
3531 * This routine assumes that sz is a multiple of PAGE_SIZE and
3532 * that dstaddr and srcaddr are page-aligned.
3533 */
3534 int
3535 uvm_share(struct vm_map *dstmap, vaddr_t dstaddr, vm_prot_t prot,
3536 struct vm_map *srcmap, vaddr_t srcaddr, vsize_t sz)
3537 {
3538 int ret = 0;
3539 vaddr_t unmap_end;
3540 vaddr_t dstva;
3541 vsize_t s_off, len, n = sz, remain;
3542 struct vm_map_entry *first = NULL, *last = NULL;
3543 struct vm_map_entry *src_entry, *psrc_entry = NULL;
3544 struct uvm_map_deadq dead;
3545
3546 if (srcaddr >= srcmap->max_offset || sz > srcmap->max_offset - srcaddr)
3547 return EINVAL;
3548
3549 TAILQ_INIT(&dead);
3550 vm_map_lock(dstmap);
3551 vm_map_lock_read(srcmap);
3552
3553 if (!uvm_map_isavail(dstmap, NULL, &first, &last, dstaddr, sz)) {
3554 ret = ENOMEM;
3555 goto exit_unlock;
3556 }
3557 if (!uvm_map_lookup_entry(srcmap, srcaddr, &src_entry)) {
3558 ret = EINVAL;
3559 goto exit_unlock;
3560 }
3561
3562 dstva = dstaddr;
3563 unmap_end = dstaddr;
3564 for (; src_entry != NULL;
3565 psrc_entry = src_entry,
3566 src_entry = RBT_NEXT(uvm_map_addr, src_entry)) {
3567 /* hole in address space, bail out */
3568 if (psrc_entry != NULL && psrc_entry->end != src_entry->start)
3569 break;
3570 if (src_entry->start >= srcaddr + sz)
3571 break;
3572
3573 if (UVM_ET_ISSUBMAP(src_entry))
3574 panic("uvm_share: encountered a submap (illegal)");
3575 if (!UVM_ET_ISCOPYONWRITE(src_entry) &&
3576 UVM_ET_ISNEEDSCOPY(src_entry))
3577 panic("uvm_share: non-copy_on_write map entries "
3578 "marked needs_copy (illegal)");
3579
3580 /*
3581 * srcaddr > map entry start? means we are in the middle of a
3582 * map, so we calculate the offset to use in the source map.
3583 */
3584 if (srcaddr > src_entry->start)
3585 s_off = srcaddr - src_entry->start;
3586 else if (srcaddr == src_entry->start)
3587 s_off = 0;
3588 else
3589 panic("uvm_share: map entry start > srcaddr");
3590
3591 remain = src_entry->end - src_entry->start - s_off;
3592
3593 /* Determine how many bytes to share in this pass */
3594 if (n < remain)
3595 len = n;
3596 else
3597 len = remain;
3598
3599 if (uvm_mapent_share(dstmap, dstva, len, s_off, prot, prot,
3600 srcmap, src_entry, &dead) == NULL)
3601 break;
3602
3603 n -= len;
3604 dstva += len;
3605 srcaddr += len;
3606 unmap_end = dstva + len;
3607 if (n == 0)
3608 goto exit_unlock;
3609 }
3610
3611 ret = EINVAL;
3612 uvm_unmap_remove(dstmap, dstaddr, unmap_end, &dead, FALSE, TRUE, FALSE);
3613
3614 exit_unlock:
3615 vm_map_unlock_read(srcmap);
3616 vm_map_unlock(dstmap);
3617 uvm_unmap_detach(&dead, 0);
3618
3619 return ret;
3620 }
3621
3622 /*
3623 * Clone map entry into other map.
3624 *
3625 * Mapping will be placed at dstaddr, for the same length.
3626 * Space must be available.
3627 * Reference counters are incremented.
3628 */
3629 struct vm_map_entry *
3630 uvm_mapent_clone(struct vm_map *dstmap, vaddr_t dstaddr, vsize_t dstlen,
3631 vsize_t off, vm_prot_t prot, vm_prot_t maxprot,
3632 struct vm_map_entry *old_entry, struct uvm_map_deadq *dead,
3633 int mapent_flags, int amap_share_flags)
3634 {
3635 struct vm_map_entry *new_entry, *first, *last;
3636
3637 KDASSERT(!UVM_ET_ISSUBMAP(old_entry));
3638
3639 /* Create new entry (linked in on creation). Fill in first, last. */
3640 first = last = NULL;
3641 if (!uvm_map_isavail(dstmap, NULL, &first, &last, dstaddr, dstlen)) {
3642 panic("uvm_mapent_clone: no space in map for "
3643 "entry in empty map");
3644 }
3645 new_entry = uvm_map_mkentry(dstmap, first, last,
3646 dstaddr, dstlen, mapent_flags, dead, NULL);
3647 if (new_entry == NULL)
3648 return NULL;
3649 /* old_entry -> new_entry */
3650 new_entry->object = old_entry->object;
3651 new_entry->offset = old_entry->offset;
3652 new_entry->aref = old_entry->aref;
3653 new_entry->etype |= old_entry->etype & ~UVM_ET_FREEMAPPED;
3654 new_entry->protection = prot;
3655 new_entry->max_protection = maxprot;
3656 new_entry->inheritance = old_entry->inheritance;
3657 new_entry->advice = old_entry->advice;
3658
3659 /* gain reference to object backing the map (can't be a submap). */
3660 if (new_entry->aref.ar_amap) {
3661 new_entry->aref.ar_pageoff += off >> PAGE_SHIFT;
3662 amap_ref(new_entry->aref.ar_amap, new_entry->aref.ar_pageoff,
3663 (new_entry->end - new_entry->start) >> PAGE_SHIFT,
3664 amap_share_flags);
3665 }
3666
3667 if (UVM_ET_ISOBJ(new_entry) &&
3668 new_entry->object.uvm_obj->pgops->pgo_reference) {
3669 new_entry->offset += off;
3670 new_entry->object.uvm_obj->pgops->pgo_reference
3671 (new_entry->object.uvm_obj);
3672 }
3673
3674 return new_entry;
3675 }
3676
3677 struct vm_map_entry *
3678 uvm_mapent_share(struct vm_map *dstmap, vaddr_t dstaddr, vsize_t dstlen,
3679 vsize_t off, vm_prot_t prot, vm_prot_t maxprot, struct vm_map *old_map,
3680 struct vm_map_entry *old_entry, struct uvm_map_deadq *dead)
3681 {
3682 /*
3683 * If old_entry refers to a copy-on-write region that has not yet been
3684 * written to (needs_copy flag is set), then we need to allocate a new
3685 * amap for old_entry.
3686 *
3687 * If we do not do this, and the process owning old_entry does a copy-on
3688 * write later, old_entry and new_entry will refer to different memory
3689 * regions, and the memory between the processes is no longer shared.
3690 *
3691 * [in other words, we need to clear needs_copy]
3692 */
3693
3694 if (UVM_ET_ISNEEDSCOPY(old_entry)) {
3695 /* get our own amap, clears needs_copy */
3696 amap_copy(old_map, old_entry, M_WAITOK, FALSE, 0, 0);
3697 /* XXXCDC: WAITOK??? */
3698 }
3699
3700 return uvm_mapent_clone(dstmap, dstaddr, dstlen, off,
3701 prot, maxprot, old_entry, dead, 0, AMAP_SHARED);
3702 }
3703
3704 /*
3705 * share the mapping: this means we want the old and
3706 * new entries to share amaps and backing objects.
3707 */
3708 struct vm_map_entry *
3709 uvm_mapent_forkshared(struct vmspace *new_vm, struct vm_map *new_map,
3710 struct vm_map *old_map,
3711 struct vm_map_entry *old_entry, struct uvm_map_deadq *dead)
3712 {
3713 struct vm_map_entry *new_entry;
3714
3715 new_entry = uvm_mapent_share(new_map, old_entry->start,
3716 old_entry->end - old_entry->start, 0, old_entry->protection,
3717 old_entry->max_protection, old_map, old_entry, dead);
3718
3719 /*
3720 * pmap_copy the mappings: this routine is optional
3721 * but if it is there it will reduce the number of
3722 * page faults in the new proc.
3723 */
3724 if (!UVM_ET_ISHOLE(new_entry))
3725 pmap_copy(new_map->pmap, old_map->pmap, new_entry->start,
3726 (new_entry->end - new_entry->start), new_entry->start);
3727
3728 return (new_entry);
3729 }
3730
3731 /*
3732 * copy-on-write the mapping (using mmap's
3733 * MAP_PRIVATE semantics)
3734 *
3735 * allocate new_entry, adjust reference counts.
3736 * (note that new references are read-only).
3737 */
3738 struct vm_map_entry *
3739 uvm_mapent_forkcopy(struct vmspace *new_vm, struct vm_map *new_map,
3740 struct vm_map *old_map,
3741 struct vm_map_entry *old_entry, struct uvm_map_deadq *dead)
3742 {
3743 struct vm_map_entry *new_entry;
3744 boolean_t protect_child;
3745
3746 new_entry = uvm_mapent_clone(new_map, old_entry->start,
3747 old_entry->end - old_entry->start, 0, old_entry->protection,
3748 old_entry->max_protection, old_entry, dead, 0, 0);
3749
3750 new_entry->etype |=
3751 (UVM_ET_COPYONWRITE|UVM_ET_NEEDSCOPY);
3752
3753 /*
3754 * the new entry will need an amap. it will either
3755 * need to be copied from the old entry or created
3756 * from scratch (if the old entry does not have an
3757 * amap). can we defer this process until later
3758 * (by setting "needs_copy") or do we need to copy
3759 * the amap now?
3760 *
3761 * we must copy the amap now if any of the following
3762 * conditions hold:
3763 * 1. the old entry has an amap and that amap is
3764 * being shared. this means that the old (parent)
3765 * process is sharing the amap with another
3766 * process. if we do not clear needs_copy here
3767 * we will end up in a situation where both the
3768 * parent and child process are referring to the
3769 * same amap with "needs_copy" set. if the
3770 * parent write-faults, the fault routine will
3771 * clear "needs_copy" in the parent by allocating
3772 * a new amap. this is wrong because the
3773 * parent is supposed to be sharing the old amap
3774 * and the new amap will break that.
3775 *
3776 * 2. if the old entry has an amap and a non-zero
3777 * wire count then we are going to have to call
3778 * amap_cow_now to avoid page faults in the
3779 * parent process. since amap_cow_now requires
3780 * "needs_copy" to be clear we might as well
3781 * clear it here as well.
3782 *
3783 */
3784 if (old_entry->aref.ar_amap != NULL &&
3785 ((amap_flags(old_entry->aref.ar_amap) &
3786 AMAP_SHARED) != 0 ||
3787 VM_MAPENT_ISWIRED(old_entry))) {
3788 amap_copy(new_map, new_entry, M_WAITOK, FALSE,
3789 0, 0);
3790 /* XXXCDC: M_WAITOK ... ok? */
3791 }
3792
3793 /*
3794 * if the parent's entry is wired down, then the
3795 * parent process does not want page faults on
3796 * access to that memory. this means that we
3797 * cannot do copy-on-write because we can't write
3798 * protect the old entry. in this case we
3799 * resolve all copy-on-write faults now, using
3800 * amap_cow_now. note that we have already
3801 * allocated any needed amap (above).
3802 */
3803 if (VM_MAPENT_ISWIRED(old_entry)) {
3804 /*
3805 * resolve all copy-on-write faults now
3806 * (note that there is nothing to do if
3807 * the old mapping does not have an amap).
3808 * XXX: is it worthwhile to bother with
3809 * pmap_copy in this case?
3810 */
3811 if (old_entry->aref.ar_amap)
3812 amap_cow_now(new_map, new_entry);
3813 } else {
3814 if (old_entry->aref.ar_amap) {
3815 /*
3816 * setup mappings to trigger copy-on-write faults
3817 * we must write-protect the parent if it has
3818 * an amap and it is not already "needs_copy"...
3819 * if it is already "needs_copy" then the parent
3820 * has already been write-protected by a previous
3821 * fork operation.
3822 *
3823 * if we do not write-protect the parent, then
3824 * we must be sure to write-protect the child
3825 * after the pmap_copy() operation.
3826 *
3827 * XXX: pmap_copy should have some way of telling
3828 * us that it didn't do anything so we can avoid
3829 * calling pmap_protect needlessly.
3830 */
3831 if (!UVM_ET_ISNEEDSCOPY(old_entry)) {
3832 if (old_entry->max_protection & PROT_WRITE) {
3833 uvm_map_lock_entry(old_entry);
3834 pmap_protect(old_map->pmap,
3835 old_entry->start,
3836 old_entry->end,
3837 old_entry->protection &
3838 ~PROT_WRITE);
3839 uvm_map_unlock_entry(old_entry);
3840 pmap_update(old_map->pmap);
3841 }
3842 old_entry->etype |= UVM_ET_NEEDSCOPY;
3843 }
3844
3845 /* parent must now be write-protected */
3846 protect_child = FALSE;
3847 } else {
3848 /*
3849 * we only need to protect the child if the
3850 * parent has write access.
3851 */
3852 if (old_entry->max_protection & PROT_WRITE)
3853 protect_child = TRUE;
3854 else
3855 protect_child = FALSE;
3856 }
3857 /*
3858 * copy the mappings
3859 * XXX: need a way to tell if this does anything
3860 */
3861 if (!UVM_ET_ISHOLE(new_entry))
3862 pmap_copy(new_map->pmap, old_map->pmap,
3863 new_entry->start,
3864 (old_entry->end - old_entry->start),
3865 old_entry->start);
3866
3867 /* protect the child's mappings if necessary */
3868 if (protect_child) {
3869 pmap_protect(new_map->pmap, new_entry->start,
3870 new_entry->end,
3871 new_entry->protection &
3872 ~PROT_WRITE);
3873 }
3874 }
3875
3876 return (new_entry);
3877 }
3878
3879 /*
3880 * zero the mapping: the new entry will be zero initialized
3881 */
3882 struct vm_map_entry *
3883 uvm_mapent_forkzero(struct vmspace *new_vm, struct vm_map *new_map,
3884 struct vm_map *old_map,
3885 struct vm_map_entry *old_entry, struct uvm_map_deadq *dead)
3886 {
3887 struct vm_map_entry *new_entry;
3888
3889 new_entry = uvm_mapent_clone(new_map, old_entry->start,
3890 old_entry->end - old_entry->start, 0, old_entry->protection,
3891 old_entry->max_protection, old_entry, dead, 0, 0);
3892
3893 new_entry->etype |=
3894 (UVM_ET_COPYONWRITE|UVM_ET_NEEDSCOPY);
3895
3896 if (new_entry->aref.ar_amap) {
3897 amap_unref(new_entry->aref.ar_amap, new_entry->aref.ar_pageoff,
3898 atop(new_entry->end - new_entry->start), 0);
3899 new_entry->aref.ar_amap = NULL;
3900 new_entry->aref.ar_pageoff = 0;
3901 }
3902
3903 if (UVM_ET_ISOBJ(new_entry)) {
3904 if (new_entry->object.uvm_obj->pgops->pgo_detach)
3905 new_entry->object.uvm_obj->pgops->pgo_detach(
3906 new_entry->object.uvm_obj);
3907 new_entry->object.uvm_obj = NULL;
3908 new_entry->etype &= ~UVM_ET_OBJ;
3909 }
3910
3911 return (new_entry);
3912 }
3913
3914 /*
3915 * uvmspace_fork: fork a process' main map
3916 *
3917 * => create a new vmspace for child process from parent.
3918 * => parent's map must not be locked.
3919 */
3920 struct vmspace *
3921 uvmspace_fork(struct process *pr)
3922 {
3923 struct vmspace *vm1 = pr->ps_vmspace;
3924 struct vmspace *vm2;
3925 struct vm_map *old_map = &vm1->vm_map;
3926 struct vm_map *new_map;
3927 struct vm_map_entry *old_entry, *new_entry;
3928 struct uvm_map_deadq dead;
3929
3930 vm_map_lock(old_map);
3931
3932 vm2 = uvmspace_alloc(old_map->min_offset, old_map->max_offset,
3933 (old_map->flags & VM_MAP_PAGEABLE) ? TRUE : FALSE, FALSE);
3934 memcpy(&vm2->vm_startcopy, &vm1->vm_startcopy,
3935 (caddr_t) (vm1 + 1) - (caddr_t) &vm1->vm_startcopy);
3936 vm2->vm_dused = 0; /* Statistic managed by us. */
3937 new_map = &vm2->vm_map;
3938 vm_map_lock(new_map);
3939
3940 /* go entry-by-entry */
3941 TAILQ_INIT(&dead);
3942 RBT_FOREACH(old_entry, uvm_map_addr, &old_map->addr) {
3943 if (old_entry->start == old_entry->end)
3944 continue;
3945
3946 /* first, some sanity checks on the old entry */
3947 if (UVM_ET_ISSUBMAP(old_entry)) {
3948 panic("fork: encountered a submap during fork "
3949 "(illegal)");
3950 }
3951
3952 if (!UVM_ET_ISCOPYONWRITE(old_entry) &&
3953 UVM_ET_ISNEEDSCOPY(old_entry)) {
3954 panic("fork: non-copy_on_write map entry marked "
3955 "needs_copy (illegal)");
3956 }
3957
3958 /* Apply inheritance. */
3959 switch (old_entry->inheritance) {
3960 case MAP_INHERIT_SHARE:
3961 new_entry = uvm_mapent_forkshared(vm2, new_map,
3962 old_map, old_entry, &dead);
3963 break;
3964 case MAP_INHERIT_COPY:
3965 new_entry = uvm_mapent_forkcopy(vm2, new_map,
3966 old_map, old_entry, &dead);
3967 break;
3968 case MAP_INHERIT_ZERO:
3969 new_entry = uvm_mapent_forkzero(vm2, new_map,
3970 old_map, old_entry, &dead);
3971 break;
3972 default:
3973 continue;
3974 }
3975
3976 /* Update process statistics. */
3977 if (!UVM_ET_ISHOLE(new_entry))
3978 new_map->size += new_entry->end - new_entry->start;
3979 if (!UVM_ET_ISOBJ(new_entry) && !UVM_ET_ISHOLE(new_entry) &&
3980 new_entry->protection != PROT_NONE) {
3981 vm2->vm_dused += uvmspace_dused(
3982 new_map, new_entry->start, new_entry->end);
3983 }
3984 }
3985
3986 vm_map_unlock(old_map);
3987 vm_map_unlock(new_map);
3988
3989 /*
3990 * This can actually happen, if multiple entries described a
3991 * space in which an entry was inherited.
3992 */
3993 uvm_unmap_detach(&dead, 0);
3994
3995 #ifdef SYSVSHM
3996 if (vm1->vm_shm)
3997 shmfork(vm1, vm2);
3998 #endif
3999
4000 return vm2;
4001 }
4002
4003 /*
4004 * uvm_map_hint: return the beginning of the best area suitable for
4005 * creating a new mapping with "prot" protection.
4006 */
4007 vaddr_t
4008 uvm_map_hint(struct vmspace *vm, vm_prot_t prot, vaddr_t minaddr,
4009 vaddr_t maxaddr)
4010 {
4011 vaddr_t addr;
4012 vaddr_t spacing;
4013
4014 #ifdef __i386__
4015 /*
4016 * If executable skip first two pages, otherwise start
4017 * after data + heap region.
4018 */
4019 if ((prot & PROT_EXEC) != 0 &&
4020 (vaddr_t)vm->vm_daddr >= I386_MAX_EXE_ADDR) {
4021 addr = (PAGE_SIZE*2) +
4022 (arc4random() & (I386_MAX_EXE_ADDR / 2 - 1));
4023 return (round_page(addr));
4024 }
4025 #endif
4026
4027 #if defined (__LP64__)
4028 spacing = MIN(4UL * 1024 * 1024 * 1024, MAXDSIZ) - 1;
4029 #else
4030 spacing = MIN(1 * 1024 * 1024 * 1024, MAXDSIZ) - 1;
4031 #endif
4032
4033 /*
4034 * Start malloc/mmap after the brk.
4035 */
4036 addr = (vaddr_t)vm->vm_daddr + BRKSIZ;
4037 addr = MAX(addr, minaddr);
4038
4039 if (addr < maxaddr) {
4040 while (spacing > maxaddr - addr)
4041 spacing >>= 1;
4042 }
4043 addr += arc4random() & spacing;
4044 return (round_page(addr));
4045 }
4046
4047 /*
4048 * uvm_map_submap: punch down part of a map into a submap
4049 *
4050 * => only the kernel_map is allowed to be submapped
4051 * => the purpose of submapping is to break up the locking granularity
4052 * of a larger map
4053 * => the range specified must have been mapped previously with a uvm_map()
4054 * call [with uobj==NULL] to create a blank map entry in the main map.
4055 * [And it had better still be blank!]
4056 * => maps which contain submaps should never be copied or forked.
4057 * => to remove a submap, use uvm_unmap() on the main map
4058 * and then uvm_map_deallocate() the submap.
4059 * => main map must be unlocked.
4060 * => submap must have been init'd and have a zero reference count.
4061 * [need not be locked as we don't actually reference it]
4062 */
4063 int
4064 uvm_map_submap(struct vm_map *map, vaddr_t start, vaddr_t end,
4065 struct vm_map *submap)
4066 {
4067 struct vm_map_entry *entry;
4068 int result;
4069
4070 if (start > map->max_offset || end > map->max_offset ||
4071 start < map->min_offset || end < map->min_offset)
4072 return EINVAL;
4073
4074 vm_map_lock(map);
4075
4076 if (uvm_map_lookup_entry(map, start, &entry)) {
4077 UVM_MAP_CLIP_START(map, entry, start);
4078 UVM_MAP_CLIP_END(map, entry, end);
4079 } else
4080 entry = NULL;
4081
4082 if (entry != NULL &&
4083 entry->start == start && entry->end == end &&
4084 entry->object.uvm_obj == NULL && entry->aref.ar_amap == NULL &&
4085 !UVM_ET_ISCOPYONWRITE(entry) && !UVM_ET_ISNEEDSCOPY(entry)) {
4086 entry->etype |= UVM_ET_SUBMAP;
4087 entry->object.sub_map = submap;
4088 entry->offset = 0;
4089 uvm_map_reference(submap);
4090 result = 0;
4091 } else
4092 result = EINVAL;
4093
4094 vm_map_unlock(map);
4095 return result;
4096 }
4097
4098 /*
4099 * uvm_map_checkprot: check protection in map
4100 *
4101 * => must allow specific protection in a fully allocated region.
4102 * => map must be read or write locked by caller.
4103 */
4104 boolean_t
4105 uvm_map_checkprot(struct vm_map *map, vaddr_t start, vaddr_t end,
4106 vm_prot_t protection)
4107 {
4108 struct vm_map_entry *entry;
4109
4110 vm_map_assert_anylock(map);
4111
4112 if (start < map->min_offset || end > map->max_offset || start > end)
4113 return FALSE;
4114 if (start == end)
4115 return TRUE;
4116
4117 /*
4118 * Iterate entries.
4119 */
4120 for (entry = uvm_map_entrybyaddr(&map->addr, start);
4121 entry != NULL && entry->start < end;
4122 entry = RBT_NEXT(uvm_map_addr, entry)) {
4123 /* Fail if a hole is found. */
4124 if (UVM_ET_ISHOLE(entry) ||
4125 (entry->end < end && entry->end != VMMAP_FREE_END(entry)))
4126 return FALSE;
4127
4128 /* Check protection. */
4129 if ((entry->protection & protection) != protection)
4130 return FALSE;
4131 }
4132 return TRUE;
4133 }
4134
4135 /*
4136 * uvm_map_create: create map
4137 */
4138 vm_map_t
4139 uvm_map_create(pmap_t pmap, vaddr_t min, vaddr_t max, int flags)
4140 {
4141 vm_map_t map;
4142
4143 map = malloc(sizeof *map, M_VMMAP, M_WAITOK);
4144 uvm_map_setup(map, pmap, min, max, flags);
4145 return (map);
4146 }
4147
4148 /*
4149 * uvm_map_deallocate: drop reference to a map
4150 *
4151 * => caller must not lock map
4152 * => we will zap map if ref count goes to zero
4153 */
4154 void
4155 uvm_map_deallocate(vm_map_t map)
4156 {
4157 int c;
4158 struct uvm_map_deadq dead;
4159
4160 c = atomic_dec_int_nv(&map->ref_count);
4161 if (c > 0) {
4162 return;
4163 }
4164
4165 /*
4166 * all references gone. unmap and free.
4167 *
4168 * No lock required: we are only one to access this map.
4169 */
4170 TAILQ_INIT(&dead);
4171 uvm_tree_sanity(map, __FILE__, __LINE__);
4172 vm_map_lock(map);
4173 uvm_unmap_remove(map, map->min_offset, map->max_offset, &dead,
4174 TRUE, FALSE, FALSE);
4175 vm_map_unlock(map);
4176 pmap_destroy(map->pmap);
4177 KASSERT(RBT_EMPTY(uvm_map_addr, &map->addr));
4178 free(map, M_VMMAP, sizeof *map);
4179
4180 uvm_unmap_detach(&dead, 0);
4181 }
4182
4183 /*
4184 * uvm_map_inherit: set inheritance code for range of addrs in map.
4185 *
4186 * => map must be unlocked
4187 * => note that the inherit code is used during a "fork". see fork
4188 * code for details.
4189 */
4190 int
4191 uvm_map_inherit(struct vm_map *map, vaddr_t start, vaddr_t end,
4192 vm_inherit_t new_inheritance)
4193 {
4194 struct vm_map_entry *entry;
4195
4196 switch (new_inheritance) {
4197 case MAP_INHERIT_NONE:
4198 case MAP_INHERIT_COPY:
4199 case MAP_INHERIT_SHARE:
4200 case MAP_INHERIT_ZERO:
4201 break;
4202 default:
4203 return (EINVAL);
4204 }
4205
4206 if (start > end)
4207 return EINVAL;
4208 start = MAX(start, map->min_offset);
4209 end = MIN(end, map->max_offset);
4210 if (start >= end)
4211 return 0;
4212
4213 vm_map_lock(map);
4214
4215 entry = uvm_map_entrybyaddr(&map->addr, start);
4216 if (entry->end > start)
4217 UVM_MAP_CLIP_START(map, entry, start);
4218 else
4219 entry = RBT_NEXT(uvm_map_addr, entry);
4220
4221 while (entry != NULL && entry->start < end) {
4222 UVM_MAP_CLIP_END(map, entry, end);
4223 entry->inheritance = new_inheritance;
4224 entry = RBT_NEXT(uvm_map_addr, entry);
4225 }
4226
4227 vm_map_unlock(map);
4228 return (0);
4229 }
4230
4231 #ifdef PMAP_CHECK_COPYIN
4232 static void inline
4233 check_copyin_add(struct vm_map *map, vaddr_t start, vaddr_t end)
4234 {
4235 if (PMAP_CHECK_COPYIN == 0 ||
4236 map->check_copyin_count >= UVM_MAP_CHECK_COPYIN_MAX)
4237 return;
4238 map->check_copyin[map->check_copyin_count].start = start;
4239 map->check_copyin[map->check_copyin_count].end = end;
4240 membar_producer();
4241 map->check_copyin_count++;
4242 }
4243
4244 /*
4245 * uvm_map_check_copyin_add: remember regions which are X-only for copyin(),
4246 * copyinstr(), uiomove(), and others
4247 *
4248 * => map must be unlocked
4249 */
4250 int
4251 uvm_map_check_copyin_add(struct vm_map *map, vaddr_t start, vaddr_t end)
4252 {
4253 if (start > end)
4254 return EINVAL;
4255 start = MAX(start, map->min_offset);
4256 end = MIN(end, map->max_offset);
4257 if (start >= end)
4258 return 0;
4259 check_copyin_add(map, start, end);
4260 return (0);
4261 }
4262 #endif /* PMAP_CHECK_COPYIN */
4263
4264 /*
4265 * uvm_map_syscall: permit system calls for range of addrs in map.
4266 *
4267 * => map must be unlocked
4268 */
4269 int
4270 uvm_map_syscall(struct vm_map *map, vaddr_t start, vaddr_t end)
4271 {
4272 struct vm_map_entry *entry;
4273
4274 if (start > end)
4275 return EINVAL;
4276 start = MAX(start, map->min_offset);
4277 end = MIN(end, map->max_offset);
4278 if (start >= end)
4279 return 0;
4280 if (map->flags & VM_MAP_SYSCALL_ONCE) /* only allowed once */
4281 return (EPERM);
4282
4283 vm_map_lock(map);
4284
4285 entry = uvm_map_entrybyaddr(&map->addr, start);
4286 if (entry->end > start)
4287 UVM_MAP_CLIP_START(map, entry, start);
4288 else
4289 entry = RBT_NEXT(uvm_map_addr, entry);
4290
4291 while (entry != NULL && entry->start < end) {
4292 UVM_MAP_CLIP_END(map, entry, end);
4293 entry->etype |= UVM_ET_SYSCALL;
4294 entry = RBT_NEXT(uvm_map_addr, entry);
4295 }
4296
4297 #ifdef PMAP_CHECK_COPYIN
4298 check_copyin_add(map, start, end); /* Add libc's text segment */
4299 #endif
4300 map->wserial++;
4301 map->flags |= VM_MAP_SYSCALL_ONCE;
4302 vm_map_unlock(map);
4303 return (0);
4304 }
4305
4306 /*
4307 * uvm_map_immutable: block mapping/mprotect for range of addrs in map.
4308 *
4309 * => map must be unlocked
4310 */
4311 int
4312 uvm_map_immutable(struct vm_map *map, vaddr_t start, vaddr_t end, int imut)
4313 {
4314 struct vm_map_entry *entry;
4315
4316 if (start > end)
4317 return EINVAL;
4318 start = MAX(start, map->min_offset);
4319 end = MIN(end, map->max_offset);
4320 if (start >= end)
4321 return 0;
4322
4323 vm_map_lock(map);
4324
4325 entry = uvm_map_entrybyaddr(&map->addr, start);
4326 if (entry->end > start)
4327 UVM_MAP_CLIP_START(map, entry, start);
4328 else
4329 entry = RBT_NEXT(uvm_map_addr, entry);
4330
4331 while (entry != NULL && entry->start < end) {
4332 UVM_MAP_CLIP_END(map, entry, end);
4333 if (imut)
4334 entry->etype |= UVM_ET_IMMUTABLE;
4335 else
4336 entry->etype &= ~UVM_ET_IMMUTABLE;
4337 entry = RBT_NEXT(uvm_map_addr, entry);
4338 }
4339
4340 map->wserial++;
4341 vm_map_unlock(map);
4342 return (0);
4343 }
4344
4345 /*
4346 * uvm_map_advice: set advice code for range of addrs in map.
4347 *
4348 * => map must be unlocked
4349 */
4350 int
4351 uvm_map_advice(struct vm_map *map, vaddr_t start, vaddr_t end, int new_advice)
4352 {
4353 struct vm_map_entry *entry;
4354
4355 switch (new_advice) {
4356 case MADV_NORMAL:
4357 case MADV_RANDOM:
4358 case MADV_SEQUENTIAL:
4359 break;
4360 default:
4361 return (EINVAL);
4362 }
4363
4364 if (start > end)
4365 return EINVAL;
4366 start = MAX(start, map->min_offset);
4367 end = MIN(end, map->max_offset);
4368 if (start >= end)
4369 return 0;
4370
4371 vm_map_lock(map);
4372
4373 entry = uvm_map_entrybyaddr(&map->addr, start);
4374 if (entry != NULL && entry->end > start)
4375 UVM_MAP_CLIP_START(map, entry, start);
4376 else if (entry!= NULL)
4377 entry = RBT_NEXT(uvm_map_addr, entry);
4378
4379 /*
4380 * XXXJRT: disallow holes?
4381 */
4382 while (entry != NULL && entry->start < end) {
4383 UVM_MAP_CLIP_END(map, entry, end);
4384 entry->advice = new_advice;
4385 entry = RBT_NEXT(uvm_map_addr, entry);
4386 }
4387
4388 vm_map_unlock(map);
4389 return (0);
4390 }
4391
4392 /*
4393 * uvm_map_extract: extract a mapping from a map and put it somewhere
4394 * in the kernel_map, setting protection to max_prot.
4395 *
4396 * => map should be unlocked (we will write lock it and kernel_map)
4397 * => returns 0 on success, error code otherwise
4398 * => start must be page aligned
4399 * => len must be page sized
4400 * => flags:
4401 * UVM_EXTRACT_FIXPROT: set prot to maxprot as we go
4402 * Mappings are QREF's.
4403 */
4404 int
4405 uvm_map_extract(struct vm_map *srcmap, vaddr_t start, vsize_t len,
4406 vaddr_t *dstaddrp, int flags)
4407 {
4408 struct uvm_map_deadq dead;
4409 struct vm_map_entry *first, *entry, *newentry, *tmp1, *tmp2;
4410 vaddr_t dstaddr;
4411 vaddr_t end;
4412 vaddr_t cp_start;
4413 vsize_t cp_len, cp_off;
4414 int error;
4415
4416 TAILQ_INIT(&dead);
4417 end = start + len;
4418
4419 /*
4420 * Sanity check on the parameters.
4421 * Also, since the mapping may not contain gaps, error out if the
4422 * mapped area is not in source map.
4423 */
4424 if ((start & (vaddr_t)PAGE_MASK) != 0 ||
4425 (end & (vaddr_t)PAGE_MASK) != 0 || end < start)
4426 return EINVAL;
4427 if (start < srcmap->min_offset || end > srcmap->max_offset)
4428 return EINVAL;
4429
4430 /* Initialize dead entries. Handle len == 0 case. */
4431 if (len == 0)
4432 return 0;
4433
4434 /* Acquire lock on srcmap. */
4435 vm_map_lock(srcmap);
4436
4437 /* Lock srcmap, lookup first and last entry in <start,len>. */
4438 first = uvm_map_entrybyaddr(&srcmap->addr, start);
4439
4440 /* Check that the range is contiguous. */
4441 for (entry = first; entry != NULL && entry->end < end;
4442 entry = RBT_NEXT(uvm_map_addr, entry)) {
4443 if (VMMAP_FREE_END(entry) != entry->end ||
4444 UVM_ET_ISHOLE(entry)) {
4445 error = EINVAL;
4446 goto fail;
4447 }
4448 }
4449 if (entry == NULL || UVM_ET_ISHOLE(entry)) {
4450 error = EINVAL;
4451 goto fail;
4452 }
4453
4454 /*
4455 * Handle need-copy flag.
4456 */
4457 for (entry = first; entry != NULL && entry->start < end;
4458 entry = RBT_NEXT(uvm_map_addr, entry)) {
4459 if (UVM_ET_ISNEEDSCOPY(entry))
4460 amap_copy(srcmap, entry, M_NOWAIT,
4461 UVM_ET_ISSTACK(entry) ? FALSE : TRUE, start, end);
4462 if (UVM_ET_ISNEEDSCOPY(entry)) {
4463 /*
4464 * amap_copy failure
4465 */
4466 error = ENOMEM;
4467 goto fail;
4468 }
4469 }
4470
4471 /* Lock destination map (kernel_map). */
4472 vm_map_lock(kernel_map);
4473
4474 if (uvm_map_findspace(kernel_map, &tmp1, &tmp2, &dstaddr, len,
4475 MAX(PAGE_SIZE, PMAP_PREFER_ALIGN()), PMAP_PREFER_OFFSET(start),
4476 PROT_NONE, 0) != 0) {
4477 error = ENOMEM;
4478 goto fail2;
4479 }
4480 *dstaddrp = dstaddr;
4481
4482 /*
4483 * We now have srcmap and kernel_map locked.
4484 * dstaddr contains the destination offset in dstmap.
4485 */
4486 /* step 1: start looping through map entries, performing extraction. */
4487 for (entry = first; entry != NULL && entry->start < end;
4488 entry = RBT_NEXT(uvm_map_addr, entry)) {
4489 KDASSERT(!UVM_ET_ISNEEDSCOPY(entry));
4490 if (UVM_ET_ISHOLE(entry))
4491 continue;
4492
4493 /* Calculate uvm_mapent_clone parameters. */
4494 cp_start = entry->start;
4495 if (cp_start < start) {
4496 cp_off = start - cp_start;
4497 cp_start = start;
4498 } else
4499 cp_off = 0;
4500 cp_len = MIN(entry->end, end) - cp_start;
4501
4502 newentry = uvm_mapent_clone(kernel_map,
4503 cp_start - start + dstaddr, cp_len, cp_off,
4504 entry->protection, entry->max_protection,
4505 entry, &dead, flags, AMAP_SHARED | AMAP_REFALL);
4506 if (newentry == NULL) {
4507 error = ENOMEM;
4508 goto fail2_unmap;
4509 }
4510 kernel_map->size += cp_len;
4511
4512 /* Figure out the best protection */
4513 if ((flags & UVM_EXTRACT_FIXPROT) &&
4514 newentry->protection != PROT_NONE)
4515 newentry->protection = newentry->max_protection;
4516 newentry->protection &= ~PROT_EXEC;
4517
4518 /*
4519 * Step 2: perform pmap copy.
4520 * (Doing this in the loop saves one RB traversal.)
4521 */
4522 pmap_copy(kernel_map->pmap, srcmap->pmap,
4523 cp_start - start + dstaddr, cp_len, cp_start);
4524 }
4525 pmap_update(kernel_map->pmap);
4526
4527 error = 0;
4528
4529 /* Unmap copied entries on failure. */
4530 fail2_unmap:
4531 if (error) {
4532 uvm_unmap_remove(kernel_map, dstaddr, dstaddr + len, &dead,
4533 FALSE, TRUE, FALSE);
4534 }
4535
4536 /* Release maps, release dead entries. */
4537 fail2:
4538 vm_map_unlock(kernel_map);
4539
4540 fail:
4541 vm_map_unlock(srcmap);
4542
4543 uvm_unmap_detach(&dead, 0);
4544
4545 return error;
4546 }
4547
4548 /*
4549 * uvm_map_clean: clean out a map range
4550 *
4551 * => valid flags:
4552 * if (flags & PGO_CLEANIT): dirty pages are cleaned first
4553 * if (flags & PGO_SYNCIO): dirty pages are written synchronously
4554 * if (flags & PGO_DEACTIVATE): any cached pages are deactivated after clean
4555 * if (flags & PGO_FREE): any cached pages are freed after clean
4556 * => returns an error if any part of the specified range isn't mapped
4557 * => never a need to flush amap layer since the anonymous memory has
4558 * no permanent home, but may deactivate pages there
4559 * => called from sys_msync() and sys_madvise()
4560 * => caller must not write-lock map (read OK).
4561 * => we may sleep while cleaning if SYNCIO [with map read-locked]
4562 */
4563
4564 int
4565 uvm_map_clean(struct vm_map *map, vaddr_t start, vaddr_t end, int flags)
4566 {
4567 struct vm_map_entry *first, *entry;
4568 struct vm_amap *amap;
4569 struct vm_anon *anon;
4570 struct vm_page *pg;
4571 struct uvm_object *uobj;
4572 vaddr_t cp_start, cp_end;
4573 int refs;
4574 int error;
4575 boolean_t rv;
4576
4577 KASSERT((flags & (PGO_FREE|PGO_DEACTIVATE)) !=
4578 (PGO_FREE|PGO_DEACTIVATE));
4579
4580 if (start > end || start < map->min_offset || end > map->max_offset)
4581 return EINVAL;
4582
4583 vm_map_lock_read(map);
4584 first = uvm_map_entrybyaddr(&map->addr, start);
4585
4586 /* Make a first pass to check for holes. */
4587 for (entry = first; entry != NULL && entry->start < end;
4588 entry = RBT_NEXT(uvm_map_addr, entry)) {
4589 if (UVM_ET_ISSUBMAP(entry)) {
4590 vm_map_unlock_read(map);
4591 return EINVAL;
4592 }
4593 if (UVM_ET_ISSUBMAP(entry) ||
4594 UVM_ET_ISHOLE(entry) ||
4595 (entry->end < end &&
4596 VMMAP_FREE_END(entry) != entry->end)) {
4597 vm_map_unlock_read(map);
4598 return EFAULT;
4599 }
4600 }
4601
4602 error = 0;
4603 for (entry = first; entry != NULL && entry->start < end;
4604 entry = RBT_NEXT(uvm_map_addr, entry)) {
4605 amap = entry->aref.ar_amap; /* top layer */
4606 if (UVM_ET_ISOBJ(entry))
4607 uobj = entry->object.uvm_obj;
4608 else
4609 uobj = NULL;
4610
4611 /*
4612 * No amap cleaning necessary if:
4613 * - there's no amap
4614 * - we're not deactivating or freeing pages.
4615 */
4616 if (amap == NULL || (flags & (PGO_DEACTIVATE|PGO_FREE)) == 0)
4617 goto flush_object;
4618
4619 cp_start = MAX(entry->start, start);
4620 cp_end = MIN(entry->end, end);
4621
4622 amap_lock(amap);
4623 for (; cp_start != cp_end; cp_start += PAGE_SIZE) {
4624 anon = amap_lookup(&entry->aref,
4625 cp_start - entry->start);
4626 if (anon == NULL)
4627 continue;
4628
4629 KASSERT(anon->an_lock == amap->am_lock);
4630 pg = anon->an_page;
4631 if (pg == NULL) {
4632 continue;
4633 }
4634 KASSERT(pg->pg_flags & PQ_ANON);
4635
4636 switch (flags & (PGO_CLEANIT|PGO_FREE|PGO_DEACTIVATE)) {
4637 /*
4638 * XXX In these first 3 cases, we always just
4639 * XXX deactivate the page. We may want to
4640 * XXX handle the different cases more
4641 * XXX specifically, in the future.
4642 */
4643 case PGO_CLEANIT|PGO_FREE:
4644 case PGO_CLEANIT|PGO_DEACTIVATE:
4645 case PGO_DEACTIVATE:
4646 deactivate_it:
4647 /* skip the page if it's wired */
4648 if (pg->wire_count != 0)
4649 break;
4650
4651 uvm_lock_pageq();
4652
4653 KASSERT(pg->uanon == anon);
4654
4655 /* zap all mappings for the page. */
4656 pmap_page_protect(pg, PROT_NONE);
4657
4658 /* ...and deactivate the page. */
4659 uvm_pagedeactivate(pg);
4660
4661 uvm_unlock_pageq();
4662 break;
4663 case PGO_FREE:
4664 /*
4665 * If there are multiple references to
4666 * the amap, just deactivate the page.
4667 */
4668 if (amap_refs(amap) > 1)
4669 goto deactivate_it;
4670
4671 /* XXX skip the page if it's wired */
4672 if (pg->wire_count != 0) {
4673 break;
4674 }
4675 amap_unadd(&entry->aref,
4676 cp_start - entry->start);
4677 refs = --anon->an_ref;
4678 if (refs == 0)
4679 uvm_anfree(anon);
4680 break;
4681 default:
4682 panic("uvm_map_clean: weird flags");
4683 }
4684 }
4685 amap_unlock(amap);
4686
4687 flush_object:
4688 cp_start = MAX(entry->start, start);
4689 cp_end = MIN(entry->end, end);
4690
4691 /*
4692 * flush pages if we've got a valid backing object.
4693 *
4694 * Don't PGO_FREE if we don't have write permission
4695 * and don't flush if this is a copy-on-write object
4696 * since we can't know our permissions on it.
4697 */
4698 if (uobj != NULL &&
4699 ((flags & PGO_FREE) == 0 ||
4700 ((entry->max_protection & PROT_WRITE) != 0 &&
4701 (entry->etype & UVM_ET_COPYONWRITE) == 0))) {
4702 rw_enter(uobj->vmobjlock, RW_WRITE);
4703 rv = uobj->pgops->pgo_flush(uobj,
4704 cp_start - entry->start + entry->offset,
4705 cp_end - entry->start + entry->offset, flags);
4706 rw_exit(uobj->vmobjlock);
4707
4708 if (rv == FALSE)
4709 error = EFAULT;
4710 }
4711 }
4712
4713 vm_map_unlock_read(map);
4714 return error;
4715 }
4716
4717 /*
4718 * UVM_MAP_CLIP_END implementation
4719 */
4720 void
4721 uvm_map_clip_end(struct vm_map *map, struct vm_map_entry *entry, vaddr_t addr)
4722 {
4723 struct vm_map_entry *tmp;
4724
4725 KASSERT(entry->start < addr && VMMAP_FREE_END(entry) > addr);
4726 tmp = uvm_mapent_alloc(map, 0);
4727
4728 /* Invoke splitentry. */
4729 uvm_map_splitentry(map, entry, tmp, addr);
4730 }
4731
4732 /*
4733 * UVM_MAP_CLIP_START implementation
4734 *
4735 * Clippers are required to not change the pointers to the entry they are
4736 * clipping on.
4737 * Since uvm_map_splitentry turns the original entry into the lowest
4738 * entry (address wise) we do a swap between the new entry and the original
4739 * entry, prior to calling uvm_map_splitentry.
4740 */
4741 void
4742 uvm_map_clip_start(struct vm_map *map, struct vm_map_entry *entry, vaddr_t addr)
4743 {
4744 struct vm_map_entry *tmp;
4745 struct uvm_addr_state *free;
4746
4747 /* Unlink original. */
4748 free = uvm_map_uaddr_e(map, entry);
4749 uvm_mapent_free_remove(map, free, entry);
4750 uvm_mapent_addr_remove(map, entry);
4751
4752 /* Copy entry. */
4753 KASSERT(entry->start < addr && VMMAP_FREE_END(entry) > addr);
4754 tmp = uvm_mapent_alloc(map, 0);
4755 uvm_mapent_copy(entry, tmp);
4756
4757 /* Put new entry in place of original entry. */
4758 uvm_mapent_addr_insert(map, tmp);
4759 uvm_mapent_free_insert(map, free, tmp);
4760
4761 /* Invoke splitentry. */
4762 uvm_map_splitentry(map, tmp, entry, addr);
4763 }
4764
4765 /*
4766 * Boundary fixer.
4767 */
4768 static inline vaddr_t uvm_map_boundfix(vaddr_t, vaddr_t, vaddr_t);
4769 static inline vaddr_t
4770 uvm_map_boundfix(vaddr_t min, vaddr_t max, vaddr_t bound)
4771 {
4772 return (min < bound && max > bound) ? bound : max;
4773 }
4774
4775 /*
4776 * Choose free list based on address at start of free space.
4777 *
4778 * The uvm_addr_state returned contains addr and is the first of:
4779 * - uaddr_exe
4780 * - uaddr_brk_stack
4781 * - uaddr_any
4782 */
4783 struct uvm_addr_state*
4784 uvm_map_uaddr(struct vm_map *map, vaddr_t addr)
4785 {
4786 struct uvm_addr_state *uaddr;
4787 int i;
4788
4789 /* Special case the first page, to prevent mmap from returning 0. */
4790 if (addr < VMMAP_MIN_ADDR)
4791 return NULL;
4792
4793 /* Upper bound for kernel maps at uvm_maxkaddr. */
4794 if ((map->flags & VM_MAP_ISVMSPACE) == 0) {
4795 if (addr >= uvm_maxkaddr)
4796 return NULL;
4797 }
4798
4799 /* Is the address inside the exe-only map? */
4800 if (map->uaddr_exe != NULL && addr >= map->uaddr_exe->uaddr_minaddr &&
4801 addr < map->uaddr_exe->uaddr_maxaddr)
4802 return map->uaddr_exe;
4803
4804 /* Check if the space falls inside brk/stack area. */
4805 if ((addr >= map->b_start && addr < map->b_end) ||
4806 (addr >= map->s_start && addr < map->s_end)) {
4807 if (map->uaddr_brk_stack != NULL &&
4808 addr >= map->uaddr_brk_stack->uaddr_minaddr &&
4809 addr < map->uaddr_brk_stack->uaddr_maxaddr) {
4810 return map->uaddr_brk_stack;
4811 } else
4812 return NULL;
4813 }
4814
4815 /*
4816 * Check the other selectors.
4817 *
4818 * These selectors are only marked as the owner, if they have insert
4819 * functions.
4820 */
4821 for (i = 0; i < nitems(map->uaddr_any); i++) {
4822 uaddr = map->uaddr_any[i];
4823 if (uaddr == NULL)
4824 continue;
4825 if (uaddr->uaddr_functions->uaddr_free_insert == NULL)
4826 continue;
4827
4828 if (addr >= uaddr->uaddr_minaddr &&
4829 addr < uaddr->uaddr_maxaddr)
4830 return uaddr;
4831 }
4832
4833 return NULL;
4834 }
4835
4836 /*
4837 * Choose free list based on address at start of free space.
4838 *
4839 * The uvm_addr_state returned contains addr and is the first of:
4840 * - uaddr_exe
4841 * - uaddr_brk_stack
4842 * - uaddr_any
4843 */
4844 struct uvm_addr_state*
4845 uvm_map_uaddr_e(struct vm_map *map, struct vm_map_entry *entry)
4846 {
4847 return uvm_map_uaddr(map, VMMAP_FREE_START(entry));
4848 }
4849
4850 /*
4851 * Returns the first free-memory boundary that is crossed by [min-max].
4852 */
4853 vsize_t
4854 uvm_map_boundary(struct vm_map *map, vaddr_t min, vaddr_t max)
4855 {
4856 struct uvm_addr_state *uaddr;
4857 int i;
4858
4859 /* Never return first page. */
4860 max = uvm_map_boundfix(min, max, VMMAP_MIN_ADDR);
4861
4862 /* Treat the maxkaddr special, if the map is a kernel_map. */
4863 if ((map->flags & VM_MAP_ISVMSPACE) == 0)
4864 max = uvm_map_boundfix(min, max, uvm_maxkaddr);
4865
4866 /* Check for exe-only boundaries. */
4867 if (map->uaddr_exe != NULL) {
4868 max = uvm_map_boundfix(min, max, map->uaddr_exe->uaddr_minaddr);
4869 max = uvm_map_boundfix(min, max, map->uaddr_exe->uaddr_maxaddr);
4870 }
4871
4872 /* Check for exe-only boundaries. */
4873 if (map->uaddr_brk_stack != NULL) {
4874 max = uvm_map_boundfix(min, max,
4875 map->uaddr_brk_stack->uaddr_minaddr);
4876 max = uvm_map_boundfix(min, max,
4877 map->uaddr_brk_stack->uaddr_maxaddr);
4878 }
4879
4880 /* Check other boundaries. */
4881 for (i = 0; i < nitems(map->uaddr_any); i++) {
4882 uaddr = map->uaddr_any[i];
4883 if (uaddr != NULL) {
4884 max = uvm_map_boundfix(min, max, uaddr->uaddr_minaddr);
4885 max = uvm_map_boundfix(min, max, uaddr->uaddr_maxaddr);
4886 }
4887 }
4888
4889 /* Boundaries at stack and brk() area. */
4890 max = uvm_map_boundfix(min, max, map->s_start);
4891 max = uvm_map_boundfix(min, max, map->s_end);
4892 max = uvm_map_boundfix(min, max, map->b_start);
4893 max = uvm_map_boundfix(min, max, map->b_end);
4894
4895 return max;
4896 }
4897
4898 /*
4899 * Update map allocation start and end addresses from proc vmspace.
4900 */
4901 void
4902 uvm_map_vmspace_update(struct vm_map *map,
4903 struct uvm_map_deadq *dead, int flags)
4904 {
4905 struct vmspace *vm;
4906 vaddr_t b_start, b_end, s_start, s_end;
4907
4908 KASSERT(map->flags & VM_MAP_ISVMSPACE);
4909 KASSERT(offsetof(struct vmspace, vm_map) == 0);
4910
4911 /*
4912 * Derive actual allocation boundaries from vmspace.
4913 */
4914 vm = (struct vmspace *)map;
4915 b_start = (vaddr_t)vm->vm_daddr;
4916 b_end = b_start + BRKSIZ;
4917 s_start = MIN((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr);
4918 s_end = MAX((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr);
4919 #ifdef DIAGNOSTIC
4920 if ((b_start & (vaddr_t)PAGE_MASK) != 0 ||
4921 (b_end & (vaddr_t)PAGE_MASK) != 0 ||
4922 (s_start & (vaddr_t)PAGE_MASK) != 0 ||
4923 (s_end & (vaddr_t)PAGE_MASK) != 0) {
4924 panic("uvm_map_vmspace_update: vmspace %p invalid bounds: "
4925 "b=0x%lx-0x%lx s=0x%lx-0x%lx",
4926 vm, b_start, b_end, s_start, s_end);
4927 }
4928 #endif
4929
4930 if (__predict_true(map->b_start == b_start && map->b_end == b_end &&
4931 map->s_start == s_start && map->s_end == s_end))
4932 return;
4933
4934 uvm_map_freelist_update(map, dead, b_start, b_end,
4935 s_start, s_end, flags);
4936 }
4937
4938 /*
4939 * Grow kernel memory.
4940 *
4941 * This function is only called for kernel maps when an allocation fails.
4942 *
4943 * If the map has a gap that is large enough to accommodate alloc_sz, this
4944 * function will make sure map->free will include it.
4945 */
4946 void
4947 uvm_map_kmem_grow(struct vm_map *map, struct uvm_map_deadq *dead,
4948 vsize_t alloc_sz, int flags)
4949 {
4950 vsize_t sz;
4951 vaddr_t end;
4952 struct vm_map_entry *entry;
4953
4954 /* Kernel memory only. */
4955 KASSERT((map->flags & VM_MAP_ISVMSPACE) == 0);
4956 /* Destroy free list. */
4957 uvm_map_freelist_update_clear(map, dead);
4958
4959 /* Include the guard page in the hard minimum requirement of alloc_sz. */
4960 if (map->flags & VM_MAP_GUARDPAGES)
4961 alloc_sz += PAGE_SIZE;
4962
4963 /*
4964 * Grow by ALLOCMUL * alloc_sz, but at least VM_MAP_KSIZE_DELTA.
4965 *
4966 * Don't handle the case where the multiplication overflows:
4967 * if that happens, the allocation is probably too big anyway.
4968 */
4969 sz = MAX(VM_MAP_KSIZE_ALLOCMUL * alloc_sz, VM_MAP_KSIZE_DELTA);
4970
4971 /*
4972 * Walk forward until a gap large enough for alloc_sz shows up.
4973 *
4974 * We assume the kernel map has no boundaries.
4975 * uvm_maxkaddr may be zero.
4976 */
4977 end = MAX(uvm_maxkaddr, map->min_offset);
4978 entry = uvm_map_entrybyaddr(&map->addr, end);
4979 while (entry && entry->fspace < alloc_sz)
4980 entry = RBT_NEXT(uvm_map_addr, entry);
4981 if (entry) {
4982 end = MAX(VMMAP_FREE_START(entry), end);
4983 end += MIN(sz, map->max_offset - end);
4984 } else
4985 end = map->max_offset;
4986
4987 /* Reserve pmap entries. */
4988 #ifdef PMAP_GROWKERNEL
4989 uvm_maxkaddr = pmap_growkernel(end);
4990 #else
4991 uvm_maxkaddr = MAX(uvm_maxkaddr, end);
4992 #endif
4993
4994 /* Rebuild free list. */
4995 uvm_map_freelist_update_refill(map, flags);
4996 }
4997
4998 /*
4999 * Freelist update subfunction: unlink all entries from freelists.
5000 */
5001 void
5002 uvm_map_freelist_update_clear(struct vm_map *map, struct uvm_map_deadq *dead)
5003 {
5004 struct uvm_addr_state *free;
5005 struct vm_map_entry *entry, *prev, *next;
5006
5007 prev = NULL;
5008 for (entry = RBT_MIN(uvm_map_addr, &map->addr); entry != NULL;
5009 entry = next) {
5010 next = RBT_NEXT(uvm_map_addr, entry);
5011
5012 free = uvm_map_uaddr_e(map, entry);
5013 uvm_mapent_free_remove(map, free, entry);
5014
5015 if (prev != NULL && entry->start == entry->end) {
5016 prev->fspace += VMMAP_FREE_END(entry) - entry->end;
5017 uvm_mapent_addr_remove(map, entry);
5018 DEAD_ENTRY_PUSH(dead, entry);
5019 } else
5020 prev = entry;
5021 }
5022 }
5023
5024 /*
5025 * Freelist update subfunction: refill the freelists with entries.
5026 */
5027 void
5028 uvm_map_freelist_update_refill(struct vm_map *map, int flags)
5029 {
5030 struct vm_map_entry *entry;
5031 vaddr_t min, max;
5032
5033 RBT_FOREACH(entry, uvm_map_addr, &map->addr) {
5034 min = VMMAP_FREE_START(entry);
5035 max = VMMAP_FREE_END(entry);
5036 entry->fspace = 0;
5037
5038 entry = uvm_map_fix_space(map, entry, min, max, flags);
5039 }
5040
5041 uvm_tree_sanity(map, __FILE__, __LINE__);
5042 }
5043
5044 /*
5045 * Change {a,b}_{start,end} allocation ranges and associated free lists.
5046 */
5047 void
5048 uvm_map_freelist_update(struct vm_map *map, struct uvm_map_deadq *dead,
5049 vaddr_t b_start, vaddr_t b_end, vaddr_t s_start, vaddr_t s_end, int flags)
5050 {
5051 KDASSERT(b_end >= b_start && s_end >= s_start);
5052 vm_map_assert_wrlock(map);
5053
5054 /* Clear all free lists. */
5055 uvm_map_freelist_update_clear(map, dead);
5056
5057 /* Apply new bounds. */
5058 map->b_start = b_start;
5059 map->b_end = b_end;
5060 map->s_start = s_start;
5061 map->s_end = s_end;
5062
5063 /* Refill free lists. */
5064 uvm_map_freelist_update_refill(map, flags);
5065 }
5066
5067 /*
5068 * Assign a uvm_addr_state to the specified pointer in vm_map.
5069 *
5070 * May sleep.
5071 */
5072 void
5073 uvm_map_set_uaddr(struct vm_map *map, struct uvm_addr_state **which,
5074 struct uvm_addr_state *newval)
5075 {
5076 struct uvm_map_deadq dead;
5077
5078 /* Pointer which must be in this map. */
5079 KASSERT(which != NULL);
5080 KASSERT((void*)map <= (void*)(which) &&
5081 (void*)(which) < (void*)(map + 1));
5082
5083 vm_map_lock(map);
5084 TAILQ_INIT(&dead);
5085 uvm_map_freelist_update_clear(map, &dead);
5086
5087 uvm_addr_destroy(*which);
5088 *which = newval;
5089
5090 uvm_map_freelist_update_refill(map, 0);
5091 vm_map_unlock(map);
5092 uvm_unmap_detach(&dead, 0);
5093 }
5094
5095 /*
5096 * Correct space insert.
5097 *
5098 * Entry must not be on any freelist.
5099 */
5100 struct vm_map_entry*
5101 uvm_map_fix_space(struct vm_map *map, struct vm_map_entry *entry,
5102 vaddr_t min, vaddr_t max, int flags)
5103 {
5104 struct uvm_addr_state *free, *entfree;
5105 vaddr_t lmax;
5106
5107 KASSERT(entry == NULL || (entry->etype & UVM_ET_FREEMAPPED) == 0);
5108 KDASSERT(min <= max);
5109 KDASSERT((entry != NULL && VMMAP_FREE_END(entry) == min) ||
5110 min == map->min_offset);
5111
5112 UVM_MAP_REQ_WRITE(map);
5113
5114 /*
5115 * During the function, entfree will always point at the uaddr state
5116 * for entry.
5117 */
5118 entfree = (entry == NULL ? NULL :
5119 uvm_map_uaddr_e(map, entry));
5120
5121 while (min != max) {
5122 /* Claim guard page for entry. */
5123 if ((map->flags & VM_MAP_GUARDPAGES) && entry != NULL &&
5124 VMMAP_FREE_END(entry) == entry->end &&
5125 entry->start != entry->end) {
5126 if (max - min == 2 * PAGE_SIZE) {
5127 /*
5128 * If the free-space gap is exactly 2 pages,
5129 * we make the guard 2 pages instead of 1.
5130 * Because in a guarded map, an area needs
5131 * at least 2 pages to allocate from:
5132 * one page for the allocation and one for
5133 * the guard.
5134 */
5135 entry->guard = 2 * PAGE_SIZE;
5136 min = max;
5137 } else {
5138 entry->guard = PAGE_SIZE;
5139 min += PAGE_SIZE;
5140 }
5141 continue;
5142 }
5143
5144 /*
5145 * Handle the case where entry has a 2-page guard, but the
5146 * space after entry is freed.
5147 */
5148 if (entry != NULL && entry->fspace == 0 &&
5149 entry->guard > PAGE_SIZE) {
5150 entry->guard = PAGE_SIZE;
5151 min = VMMAP_FREE_START(entry);
5152 }
5153
5154 lmax = uvm_map_boundary(map, min, max);
5155 free = uvm_map_uaddr(map, min);
5156
5157 /*
5158 * Entries are merged if they point at the same uvm_free().
5159 * Exception to that rule: if min == uvm_maxkaddr, a new
5160 * entry is started regardless (otherwise the allocators
5161 * will get confused).
5162 */
5163 if (entry != NULL && free == entfree &&
5164 !((map->flags & VM_MAP_ISVMSPACE) == 0 &&
5165 min == uvm_maxkaddr)) {
5166 KDASSERT(VMMAP_FREE_END(entry) == min);
5167 entry->fspace += lmax - min;
5168 } else {
5169 /*
5170 * Commit entry to free list: it'll not be added to
5171 * anymore.
5172 * We'll start a new entry and add to that entry
5173 * instead.
5174 */
5175 if (entry != NULL)
5176 uvm_mapent_free_insert(map, entfree, entry);
5177
5178 /* New entry for new uaddr. */
5179 entry = uvm_mapent_alloc(map, flags);
5180 KDASSERT(entry != NULL);
5181 entry->end = entry->start = min;
5182 entry->guard = 0;
5183 entry->fspace = lmax - min;
5184 entry->object.uvm_obj = NULL;
5185 entry->offset = 0;
5186 entry->etype = 0;
5187 entry->protection = entry->max_protection = 0;
5188 entry->inheritance = 0;
5189 entry->wired_count = 0;
5190 entry->advice = 0;
5191 entry->aref.ar_pageoff = 0;
5192 entry->aref.ar_amap = NULL;
5193 uvm_mapent_addr_insert(map, entry);
5194
5195 entfree = free;
5196 }
5197
5198 min = lmax;
5199 }
5200 /* Finally put entry on the uaddr state. */
5201 if (entry != NULL)
5202 uvm_mapent_free_insert(map, entfree, entry);
5203
5204 return entry;
5205 }
5206
5207 /*
5208 * MQuery style of allocation.
5209 *
5210 * This allocator searches forward until sufficient space is found to map
5211 * the given size.
5212 *
5213 * XXX: factor in offset (via pmap_prefer) and protection?
5214 */
5215 int
5216 uvm_map_mquery(struct vm_map *map, vaddr_t *addr_p, vsize_t sz, voff_t offset,
5217 int flags)
5218 {
5219 struct vm_map_entry *entry, *last;
5220 vaddr_t addr;
5221 vaddr_t tmp, pmap_align, pmap_offset;
5222 int error;
5223
5224 addr = *addr_p;
5225 vm_map_lock_read(map);
5226
5227 /* Configure pmap prefer. */
5228 if (offset != UVM_UNKNOWN_OFFSET) {
5229 pmap_align = MAX(PAGE_SIZE, PMAP_PREFER_ALIGN());
5230 pmap_offset = PMAP_PREFER_OFFSET(offset);
5231 } else {
5232 pmap_align = PAGE_SIZE;
5233 pmap_offset = 0;
5234 }
5235
5236 /* Align address to pmap_prefer unless FLAG_FIXED is set. */
5237 if (!(flags & UVM_FLAG_FIXED) && offset != UVM_UNKNOWN_OFFSET) {
5238 tmp = (addr & ~(pmap_align - 1)) | pmap_offset;
5239 if (tmp < addr)
5240 tmp += pmap_align;
5241 addr = tmp;
5242 }
5243
5244 /* First, check if the requested range is fully available. */
5245 entry = uvm_map_entrybyaddr(&map->addr, addr);
5246 last = NULL;
5247 if (uvm_map_isavail(map, NULL, &entry, &last, addr, sz)) {
5248 error = 0;
5249 goto out;
5250 }
5251 if (flags & UVM_FLAG_FIXED) {
5252 error = EINVAL;
5253 goto out;
5254 }
5255
5256 error = ENOMEM; /* Default error from here. */
5257
5258 /*
5259 * At this point, the memory at <addr, sz> is not available.
5260 * The reasons are:
5261 * [1] it's outside the map,
5262 * [2] it starts in used memory (and therefore needs to move
5263 * toward the first free page in entry),
5264 * [3] it starts in free memory but bumps into used memory.
5265 *
5266 * Note that for case [2], the forward moving is handled by the
5267 * for loop below.
5268 */
5269 if (entry == NULL) {
5270 /* [1] Outside the map. */
5271 if (addr >= map->max_offset)
5272 goto out;
5273 else
5274 entry = RBT_MIN(uvm_map_addr, &map->addr);
5275 } else if (VMMAP_FREE_START(entry) <= addr) {
5276 /* [3] Bumped into used memory. */
5277 entry = RBT_NEXT(uvm_map_addr, entry);
5278 }
5279
5280 /* Test if the next entry is sufficient for the allocation. */
5281 for (; entry != NULL;
5282 entry = RBT_NEXT(uvm_map_addr, entry)) {
5283 if (entry->fspace == 0)
5284 continue;
5285 addr = VMMAP_FREE_START(entry);
5286
5287 restart: /* Restart address checks on address change. */
5288 tmp = (addr & ~(pmap_align - 1)) | pmap_offset;
5289 if (tmp < addr)
5290 tmp += pmap_align;
5291 addr = tmp;
5292 if (addr >= VMMAP_FREE_END(entry))
5293 continue;
5294
5295 /* Skip brk() allocation addresses. */
5296 if (addr + sz > map->b_start && addr < map->b_end) {
5297 if (VMMAP_FREE_END(entry) > map->b_end) {
5298 addr = map->b_end;
5299 goto restart;
5300 } else
5301 continue;
5302 }
5303 /* Skip stack allocation addresses. */
5304 if (addr + sz > map->s_start && addr < map->s_end) {
5305 if (VMMAP_FREE_END(entry) > map->s_end) {
5306 addr = map->s_end;
5307 goto restart;
5308 } else
5309 continue;
5310 }
5311
5312 last = NULL;
5313 if (uvm_map_isavail(map, NULL, &entry, &last, addr, sz)) {
5314 error = 0;
5315 goto out;
5316 }
5317 }
5318
5319 out:
5320 vm_map_unlock_read(map);
5321 if (error == 0)
5322 *addr_p = addr;
5323 return error;
5324 }
5325
5326 boolean_t
5327 vm_map_lock_try_ln(struct vm_map *map, char *file, int line)
5328 {
5329 boolean_t rv;
5330
5331 if (map->flags & VM_MAP_INTRSAFE) {
5332 rv = mtx_enter_try(&map->mtx);
5333 } else {
5334 mtx_enter(&map->flags_lock);
5335 if (map->flags & VM_MAP_BUSY) {
5336 mtx_leave(&map->flags_lock);
5337 return (FALSE);
5338 }
5339 mtx_leave(&map->flags_lock);
5340 rv = (rw_enter(&map->lock, RW_WRITE|RW_NOSLEEP) == 0);
5341 /* check if the lock is busy and back out if we won the race */
5342 if (rv) {
5343 mtx_enter(&map->flags_lock);
5344 if (map->flags & VM_MAP_BUSY) {
5345 rw_exit(&map->lock);
5346 rv = FALSE;
5347 }
5348 mtx_leave(&map->flags_lock);
5349 }
5350 }
5351
5352 if (rv) {
5353 map->timestamp++;
5354 LPRINTF(("map lock: %p (at %s %d)\n", map, file, line));
5355 uvm_tree_sanity(map, file, line);
5356 uvm_tree_size_chk(map, file, line);
5357 }
5358
5359 return (rv);
5360 }
5361
5362 void
5363 vm_map_lock_ln(struct vm_map *map, char *file, int line)
5364 {
5365 if ((map->flags & VM_MAP_INTRSAFE) == 0) {
5366 do {
5367 mtx_enter(&map->flags_lock);
5368 tryagain:
5369 while (map->flags & VM_MAP_BUSY) {
5370 map->flags |= VM_MAP_WANTLOCK;
5371 msleep_nsec(&map->flags, &map->flags_lock,
5372 PVM, vmmapbsy, INFSLP);
5373 }
5374 mtx_leave(&map->flags_lock);
5375 } while (rw_enter(&map->lock, RW_WRITE|RW_SLEEPFAIL) != 0);
5376 /* check if the lock is busy and back out if we won the race */
5377 mtx_enter(&map->flags_lock);
5378 if (map->flags & VM_MAP_BUSY) {
5379 rw_exit(&map->lock);
5380 goto tryagain;
5381 }
5382 mtx_leave(&map->flags_lock);
5383 } else {
5384 mtx_enter(&map->mtx);
5385 }
5386
5387 map->timestamp++;
5388 LPRINTF(("map lock: %p (at %s %d)\n", map, file, line));
5389 uvm_tree_sanity(map, file, line);
5390 uvm_tree_size_chk(map, file, line);
5391 }
5392
5393 void
5394 vm_map_lock_read_ln(struct vm_map *map, char *file, int line)
5395 {
5396 if ((map->flags & VM_MAP_INTRSAFE) == 0)
5397 rw_enter_read(&map->lock);
5398 else
5399 mtx_enter(&map->mtx);
5400 LPRINTF(("map lock: %p (at %s %d)\n", map, file, line));
5401 uvm_tree_sanity(map, file, line);
5402 uvm_tree_size_chk(map, file, line);
5403 }
5404
5405 void
5406 vm_map_unlock_ln(struct vm_map *map, char *file, int line)
5407 {
5408 uvm_tree_sanity(map, file, line);
5409 uvm_tree_size_chk(map, file, line);
5410 LPRINTF(("map unlock: %p (at %s %d)\n", map, file, line));
5411 if ((map->flags & VM_MAP_INTRSAFE) == 0)
5412 rw_exit(&map->lock);
5413 else
5414 mtx_leave(&map->mtx);
5415 }
5416
5417 void
5418 vm_map_unlock_read_ln(struct vm_map *map, char *file, int line)
5419 {
5420 /* XXX: RO */ uvm_tree_sanity(map, file, line);
5421 /* XXX: RO */ uvm_tree_size_chk(map, file, line);
5422 LPRINTF(("map unlock: %p (at %s %d)\n", map, file, line));
5423 if ((map->flags & VM_MAP_INTRSAFE) == 0)
5424 rw_exit_read(&map->lock);
5425 else
5426 mtx_leave(&map->mtx);
5427 }
5428
5429 void
5430 vm_map_downgrade_ln(struct vm_map *map, char *file, int line)
5431 {
5432 uvm_tree_sanity(map, file, line);
5433 uvm_tree_size_chk(map, file, line);
5434 LPRINTF(("map unlock: %p (at %s %d)\n", map, file, line));
5435 LPRINTF(("map lock: %p (at %s %d)\n", map, file, line));
5436 KASSERT((map->flags & VM_MAP_INTRSAFE) == 0);
5437 if ((map->flags & VM_MAP_INTRSAFE) == 0)
5438 rw_enter(&map->lock, RW_DOWNGRADE);
5439 }
5440
5441 void
5442 vm_map_upgrade_ln(struct vm_map *map, char *file, int line)
5443 {
5444 /* XXX: RO */ uvm_tree_sanity(map, file, line);
5445 /* XXX: RO */ uvm_tree_size_chk(map, file, line);
5446 LPRINTF(("map unlock: %p (at %s %d)\n", map, file, line));
5447 KASSERT((map->flags & VM_MAP_INTRSAFE) == 0);
5448 if ((map->flags & VM_MAP_INTRSAFE) == 0) {
5449 rw_exit_read(&map->lock);
5450 rw_enter_write(&map->lock);
5451 }
5452 LPRINTF(("map lock: %p (at %s %d)\n", map, file, line));
5453 uvm_tree_sanity(map, file, line);
5454 }
5455
5456 void
5457 vm_map_busy_ln(struct vm_map *map, char *file, int line)
5458 {
5459 KASSERT((map->flags & VM_MAP_INTRSAFE) == 0);
5460 mtx_enter(&map->flags_lock);
5461 map->flags |= VM_MAP_BUSY;
5462 mtx_leave(&map->flags_lock);
5463 }
5464
5465 void
5466 vm_map_unbusy_ln(struct vm_map *map, char *file, int line)
5467 {
5468 int oflags;
5469
5470 KASSERT((map->flags & VM_MAP_INTRSAFE) == 0);
5471 mtx_enter(&map->flags_lock);
5472 oflags = map->flags;
5473 map->flags &= ~(VM_MAP_BUSY|VM_MAP_WANTLOCK);
5474 mtx_leave(&map->flags_lock);
5475 if (oflags & VM_MAP_WANTLOCK)
5476 wakeup(&map->flags);
5477 }
5478
5479 void
5480 vm_map_assert_anylock_ln(struct vm_map *map, char *file, int line)
5481 {
5482 LPRINTF(("map assert read or write locked: %p (at %s %d)\n", map, file, line));
5483 if ((map->flags & VM_MAP_INTRSAFE) == 0)
5484 rw_assert_anylock(&map->lock);
5485 else
5486 MUTEX_ASSERT_LOCKED(&map->mtx);
5487 }
5488
5489 void
5490 vm_map_assert_wrlock_ln(struct vm_map *map, char *file, int line)
5491 {
5492 LPRINTF(("map assert write locked: %p (at %s %d)\n", map, file, line));
5493 if ((map->flags & VM_MAP_INTRSAFE) == 0) {
5494 splassert(IPL_NONE);
5495 rw_assert_wrlock(&map->lock);
5496 } else
5497 MUTEX_ASSERT_LOCKED(&map->mtx);
5498 }
5499
5500 #ifndef SMALL_KERNEL
5501 int
5502 uvm_map_fill_vmmap(struct vm_map *map, struct kinfo_vmentry *kve,
5503 size_t *lenp)
5504 {
5505 struct vm_map_entry *entry;
5506 vaddr_t start;
5507 int cnt, maxcnt, error = 0;
5508
5509 KASSERT(*lenp > 0);
5510 KASSERT((*lenp % sizeof(*kve)) == 0);
5511 cnt = 0;
5512 maxcnt = *lenp / sizeof(*kve);
5513 KASSERT(maxcnt > 0);
5514
5515 /*
5516 * Return only entries whose address is above the given base
5517 * address. This allows userland to iterate without knowing the
5518 * number of entries beforehand.
5519 */
5520 start = (vaddr_t)kve[0].kve_start;
5521
5522 vm_map_lock(map);
5523 RBT_FOREACH(entry, uvm_map_addr, &map->addr) {
5524 if (cnt == maxcnt) {
5525 error = ENOMEM;
5526 break;
5527 }
5528 if (start != 0 && entry->start < start)
5529 continue;
5530 kve->kve_start = entry->start;
5531 kve->kve_end = entry->end;
5532 kve->kve_guard = entry->guard;
5533 kve->kve_fspace = entry->fspace;
5534 kve->kve_fspace_augment = entry->fspace_augment;
5535 kve->kve_offset = entry->offset;
5536 kve->kve_wired_count = entry->wired_count;
5537 kve->kve_etype = entry->etype;
5538 kve->kve_protection = entry->protection;
5539 kve->kve_max_protection = entry->max_protection;
5540 kve->kve_advice = entry->advice;
5541 kve->kve_inheritance = entry->inheritance;
5542 kve->kve_flags = entry->flags;
5543 kve++;
5544 cnt++;
5545 }
5546 vm_map_unlock(map);
5547
5548 KASSERT(cnt <= maxcnt);
5549
5550 *lenp = sizeof(*kve) * cnt;
5551 return error;
5552 }
5553 #endif
5554
5555
5556 RBT_GENERATE_AUGMENT(uvm_map_addr, vm_map_entry, daddrs.addr_entry,
5557 uvm_mapentry_addrcmp, uvm_map_addr_augment);
5558
5559
5560 /*
5561 * MD code: vmspace allocator setup.
5562 */
5563
5564 #ifdef __i386__
5565 void
5566 uvm_map_setup_md(struct vm_map *map)
5567 {
5568 vaddr_t min, max;
5569
5570 min = map->min_offset;
5571 max = map->max_offset;
5572
5573 /*
5574 * Ensure the selectors will not try to manage page 0;
5575 * it's too special.
5576 */
5577 if (min < VMMAP_MIN_ADDR)
5578 min = VMMAP_MIN_ADDR;
5579
5580 #if 0 /* Cool stuff, not yet */
5581 /* Executable code is special. */
5582 map->uaddr_exe = uaddr_rnd_create(min, I386_MAX_EXE_ADDR);
5583 /* Place normal allocations beyond executable mappings. */
5584 map->uaddr_any[3] = uaddr_pivot_create(2 * I386_MAX_EXE_ADDR, max);
5585 #else /* Crappy stuff, for now */
5586 map->uaddr_any[0] = uaddr_rnd_create(min, max);
5587 #endif
5588
5589 #ifndef SMALL_KERNEL
5590 map->uaddr_brk_stack = uaddr_stack_brk_create(min, max);
5591 #endif /* !SMALL_KERNEL */
5592 }
5593 #elif __LP64__
5594 void
5595 uvm_map_setup_md(struct vm_map *map)
5596 {
5597 vaddr_t min, max;
5598
5599 min = map->min_offset;
5600 max = map->max_offset;
5601
5602 /*
5603 * Ensure the selectors will not try to manage page 0;
5604 * it's too special.
5605 */
5606 if (min < VMMAP_MIN_ADDR)
5607 min = VMMAP_MIN_ADDR;
5608
5609 #if 0 /* Cool stuff, not yet */
5610 map->uaddr_any[3] = uaddr_pivot_create(MAX(min, 0x100000000ULL), max);
5611 #else /* Crappy stuff, for now */
5612 map->uaddr_any[0] = uaddr_rnd_create(min, max);
5613 #endif
5614
5615 #ifndef SMALL_KERNEL
5616 map->uaddr_brk_stack = uaddr_stack_brk_create(min, max);
5617 #endif /* !SMALL_KERNEL */
5618 }
5619 #else /* non-i386, 32 bit */
5620 void
5621 uvm_map_setup_md(struct vm_map *map)
5622 {
5623 vaddr_t min, max;
5624
5625 min = map->min_offset;
5626 max = map->max_offset;
5627
5628 /*
5629 * Ensure the selectors will not try to manage page 0;
5630 * it's too special.
5631 */
5632 if (min < VMMAP_MIN_ADDR)
5633 min = VMMAP_MIN_ADDR;
5634
5635 #if 0 /* Cool stuff, not yet */
5636 map->uaddr_any[3] = uaddr_pivot_create(min, max);
5637 #else /* Crappy stuff, for now */
5638 map->uaddr_any[0] = uaddr_rnd_create(min, max);
5639 #endif
5640
5641 #ifndef SMALL_KERNEL
5642 map->uaddr_brk_stack = uaddr_stack_brk_create(min, max);
5643 #endif /* !SMALL_KERNEL */
5644 }
5645 #endif
Cache object: ca35e42883e10875f33fbd23351d8a7a
|