FreeBSD/Linux Kernel Cross Reference
sys/vm/vm_map.c
1 /*
2 * Mach Operating System
3 * Copyright (c) 1993-1987 Carnegie Mellon University
4 * All Rights Reserved.
5 *
6 * Permission to use, copy, modify and distribute this software and its
7 * documentation is hereby granted, provided that both the copyright
8 * notice and this permission notice appear in all copies of the
9 * software, derivative works or modified versions, and any portions
10 * thereof, and that both notices appear in supporting documentation.
11 *
12 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
13 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
14 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
15 *
16 * Carnegie Mellon requests users of this software to return to
17 *
18 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
19 * School of Computer Science
20 * Carnegie Mellon University
21 * Pittsburgh PA 15213-3890
22 *
23 * any improvements or extensions that they make and grant Carnegie Mellon
24 * the rights to redistribute these changes.
25 */
26 /*
27 * HISTORY
28 * $Log: vm_map.c,v $
29 * Revision 2.44 93/08/10 15:12:59 mrt
30 * Included support for projected buffers. The projected_on field of new
31 * vm map entries is defaulted to null. projected_buffer_collect is called
32 * from vm_map_deallocate to garbage collect projected buffers when a task
33 * terminates. Projected buffer entries are cloned exactly on map inheritance.
34 * The user is denied direct deallocation of non-persistent projected buffers
35 * (these need to be deallocated by projected_buffer_deallocate, presumably
36 * by the device driver that created it).
37 * [93/02/16 09:37:28 jcb]
38 *
39 * Revision 2.43 93/01/14 18:01:12 danner
40 * 64bit cleanup.
41 * [92/12/01 af]
42 *
43 * Revision 2.42 92/08/03 18:00:59 jfriedl
44 * removed silly prototypes
45 * [92/08/02 jfriedl]
46 *
47 * Revision 2.41 92/05/23 12:08:19 jfriedl
48 * Removed unused variables. Some cleanup to quiet gcc warnings.
49 * [92/05/16 jfriedl]
50 *
51 * Revision 2.40 92/04/06 01:15:10 rpd
52 * Fixed vm_map_copyout_page_list to mark the pages dirty. From dlb.
53 * [92/04/05 rpd]
54 *
55 * Revision 2.39 92/04/01 19:36:48 rpd
56 * Removed pmap_remove_attributes.
57 * [92/03/25 rpd]
58 * Always need vm_map_copy_discard_cont (not just NORMA_IPC).
59 * Add continuation recognition optimization to vm_map_copy_discard
60 * to avoid tail recursion when vm_map_copy_discard_cont is used.
61 * [92/03/20 14:14:00 dlb]
62 *
63 * Move inheritance arg check out of vm_map_inherit.
64 * Its callers must do this.
65 * [92/02/25 16:59:30 dlb]
66 *
67 * Revision 2.38 92/03/03 00:44:56 rpd
68 * Another wiring fix for vm_map_copyout_page_list. From dlb.
69 * [92/03/02 rpd]
70 *
71 * Revision 2.37 92/03/01 15:15:20 rpd
72 * Fixed must_wire code in vm_map_copyout_page_list.
73 * Fixed vm_map_fork/VM_MAP_INHERIT_SHARE. From dlb.
74 * [92/03/01 rpd]
75 *
76 * Revision 2.36 92/02/23 19:51:30 elf
77 * Remove all keep_wired logic. wiring_required logic in
78 * vm_map_copyin_page_list is the replacement.
79 * [92/02/21 10:15:26 dlb]
80 *
81 * Change wiring_allowed to wiring_required. Pay attention to
82 * it in vm_map_copyout_page list. This is only for the default
83 * pager -- to make it fully general, vm_map_copyout has to
84 * be modified as well - see XXX comment at bottom of routine.
85 * [92/02/20 15:18:13 dlb]
86 *
87 * Use object->shadowed and object->use_shared copy to
88 * detect sharing instead of the reference count.
89 * [92/02/19 17:39:10 dlb]
90 *
91 * Use is_shared field in map entries to detect sharing.
92 * Some minor bug fixes to the code that eliminates
93 * sharing maps.
94 * [92/02/19 14:25:30 dlb]
95 *
96 * Use object->use_shared_copy instead of new copy strategy.
97 * Removed all sharing map logic. Rewrote comments to reflect this.
98 * vm_map_verify_done is now a macro in vm_map.h as a result.
99 *
100 * First cut (commented out) at vm_map_copy_overwrite optimization
101 * to insert entries from copy into target, still needs work.
102 *
103 * Removed (bogus) single_use argument from vm_map_lookup().
104 *
105 * Replace share map logic in vm_map_fork with asynchronous
106 * copy-on-write. Check for null object in vm_map_entry_delete.
107 * [92/01/06 16:22:17 dlb]
108 *
109 * Fixed offset bug in vm_map_copyout_page_list.
110 * Fixed format errors in vm_map_copy_print.
111 * [92/01/09 15:32:44 jsb]
112 *
113 * Added vm_map_copy_print.
114 * [92/01/08 10:10:53 jsb]
115 *
116 * Revision 2.35 92/02/19 15:45:33 elf
117 * Picked up dlb fix for vm_map_copyin_page_list,
118 * to make the continuation args correctly.
119 * [92/02/19 rpd]
120 *
121 * Revision 2.34 92/02/19 15:09:57 elf
122 * Picked up dlb fix for vm_map_copyin_page_list,
123 * for when vm_fault_page returns memory-error.
124 * [92/02/14 rpd]
125 *
126 * Revision 2.33 92/01/03 20:34:57 dbg
127 * Fix erroneous boundary condition in search for free space in
128 * vm_map_copyout(), vm_map_copyout_page_list(). The 'end' of the
129 * entry is the last used address + 1, and therefore can be <= the
130 * start of the next entry. See vm_map_enter(), where it was done
131 * correctly.
132 * [91/12/23 dbg]
133 *
134 * Add vm_map_copy_copy. It is needed by kernel interface
135 * routines that may still return errors after copying a
136 * copy-object into kernel address space.
137 * [91/12/18 dbg]
138 *
139 * Revision 2.32 91/12/13 13:49:41 jsb
140 * Removed NORMA_ETHER always_steal hack.
141 *
142 * Revision 2.31 91/12/09 19:23:39 rpd
143 * Fixed vm_map_copyout_page_list a la vm_object_coalesce.
144 * Fixed vm_map_copy_overwrite to check for misalignment.
145 * Fixed some log infelicities.
146 * [91/12/09 rpd]
147 *
148 * Revision 2.30 91/12/10 13:26:34 jsb
149 * Simplify page list continuation abort logic.
150 * [91/12/10 12:56:06 jsb]
151 *
152 * Rewrite bogus (abort || src_destroy_only) logic in
153 * vm_map_copyin_page_list_cont.
154 * Change vm_map_convert_to_page_list routines to avoid
155 * leaking object references. Includes a new version of
156 * vm_map_copy_discard for use as a page list continuation.
157 * Hold a map reference in vm_map_copyin_page_list continuations.
158 * Add some checks in vm_map_convert_from_page_list.
159 * [91/12/04 dlb]
160 *
161 * Revision 2.29 91/11/14 17:08:56 rpd
162 * Add ifdef's to always steal code. Not needed (or wanted) except
163 * to workaround a norma ether bug. UGH!
164 * [91/11/14 jeffreyh]
165 *
166 * Add consume on success logic to vm_map_copyout_page_list.
167 * [91/11/12 dlb]
168 *
169 * Revision 2.28 91/11/14 16:56:51 rpd
170 * Made vm_map_convert_*_page_list_* routines look more like
171 * David's vm_map_copy{in,out}_page_list code.
172 * [91/11/00 jsb]
173 *
174 * Revision 2.27 91/10/09 16:20:05 af
175 * Fixed vm_map_copy_page_discard to lock before activating.
176 * Fixed vm_map_copyout_page_list to clear just the busy bit (from dlb).
177 * Fixed vm_map_copy_steal_pages to activate if necessary (from dlb).
178 * [91/10/07 rpd]
179 *
180 * Fixed vm_map_copyout_page_list to clear busy and dirty bits.
181 * [91/10/06 rpd]
182 *
183 * Picked up dlb fix for stealing wired pages in vm_map_copyin_page_list.
184 * [91/09/27 rpd]
185 *
186 * Revision 2.26 91/08/28 11:18:10 jsb
187 * Changed vm_map_copyout to discard the copy object only upon success.
188 * [91/08/03 rpd]
189 * Initialize copy->cpy_cont and copy->cpy_cont args in
190 * vm_map_convert_to_page_list and
191 * vm_map_convert_to_page_list_from_object.
192 * [91/08/16 10:34:53 jsb]
193 *
194 * Optimize stealing wired pages case of vm_map_copyin_page_list.
195 * [91/08/12 17:36:57 dlb]
196 *
197 * Move vm_map_copy_steal pages in this file. Improve comments,
198 * and related cleanup.
199 * [91/08/06 17:22:43 dlb]
200 *
201 * Split page release logic for page lists into separate
202 * routine, vm_map_copy_page_discard. Minor continuation
203 * bug fix.
204 * [91/08/05 17:48:23 dlb]
205 *
206 * Move logic that steals pages by making a new copy into a separate
207 * routine since both vm_map_{copyin,copyout}_page_list may need it.
208 * Also: Previous merge included logic to be a little more careful
209 * about what gets copied when a map entry is duplicated.
210 * [91/07/31 15:15:19 dlb]
211 *
212 * Implement vm_map_copy continuation for page lists.
213 * Implement in transition map entries needed by the above.
214 * [91/07/30 14:16:40 dlb]
215 *
216 * New and improved version of vm_map_copyin_page_list:
217 * Clean up error handling (especially vm_fault_page returns).
218 * Avoid holding map locks across faults and page copies.
219 * Move page stealing code to separate loop to deal with
220 * pagein errors from vm_fault_page.
221 * Add support for stealing wired pages (allows page stealing on
222 * pagein from default pager).
223 * [91/07/03 14:15:39 dlb]
224 * Restored definition of vm_map_convert_from_page_list.
225 * Added definition of vm_map_convert_to_page_list_from_object.
226 * Added call to vm_map_convert_from_page_list to vm_map_copy_overwrite.
227 * Added include of kern/assert.h.
228 * [91/08/15 13:20:13 jsb]
229 *
230 * Revision 2.25 91/08/03 18:19:58 jsb
231 * Removed vm_map_convert_from_page_list.
232 * Temporarily make vm_map_copyin_page_list always steal pages.
233 * [91/08/01 22:49:17 jsb]
234 *
235 * NORMA_IPC: Added vm_map_convert_{to,from}_page_list functions.
236 * These will be removed when all kernel interfaces
237 * understand page_list copy objects.
238 * [91/07/04 14:00:24 jsb]
239 *
240 * Removed obsoleted NORMA_IPC functions:
241 * ipc_clport_copyin_object
242 * vm_map_copyout_page
243 * ipc_clport_copyin_pagelist
244 * [91/07/04 13:20:09 jsb]
245 *
246 * Revision 2.24 91/07/01 08:27:22 jsb
247 * 20-Jun-91 David L. Black (dlb) at Open Software Foundation
248 * Add support for page list format map copies. NORMA/CLPORT code
249 * will be cut over later.
250 *
251 * 18-Jun-91 David L. Black (dlb) at Open Software Foundation
252 * Convert to use multiple format map copies.
253 * [91/06/29 16:37:03 jsb]
254 *
255 * Revision 2.23 91/06/25 10:33:33 rpd
256 * Changed mach_port_t to ipc_port_t where appropriate.
257 * [91/05/28 rpd]
258 *
259 * Revision 2.22 91/06/17 15:49:02 jsb
260 * Renamed NORMA conditionals.
261 * [91/06/17 11:11:13 jsb]
262 *
263 * Revision 2.21 91/06/06 17:08:22 jsb
264 * NORMA_IPC support (first cut):
265 * Work with page lists instead of copy objects.
266 * Make coalescing more useful.
267 * [91/05/14 09:34:41 jsb]
268 *
269 * Revision 2.20 91/05/18 14:40:53 rpd
270 * Restored mask argument to vm_map_find_entry.
271 * [91/05/02 rpd]
272 * Removed ZALLOC and ZFREE.
273 * [91/03/31 rpd]
274 *
275 * Revised vm_map_find_entry to allow coalescing of entries.
276 * [91/03/28 rpd]
277 * Removed map_data. Moved kentry_data here.
278 * [91/03/22 rpd]
279 *
280 * Revision 2.19 91/05/14 17:49:38 mrt
281 * Correcting copyright
282 *
283 * Revision 2.18 91/03/16 15:05:42 rpd
284 * Fixed vm_map_pmap_enter to activate loose pages after PMAP_ENTER.
285 * [91/03/11 rpd]
286 * Removed vm_map_find.
287 * [91/03/03 rpd]
288 * Fixed vm_map_entry_delete's use of vm_object_page_remove,
289 * following dlb's report.
290 * [91/01/26 rpd]
291 * Picked up dlb's fix for vm_map_fork/VM_INHERIT_COPY of wired entries.
292 * [91/01/12 rpd]
293 *
294 * Revision 2.17 91/02/05 17:58:43 mrt
295 * Changed to new Mach copyright
296 * [91/02/01 16:32:45 mrt]
297 *
298 * Revision 2.16 91/01/08 16:45:08 rpd
299 * Added continuation argument to thread_block.
300 * [90/12/08 rpd]
301 *
302 * Revision 2.15 90/11/05 14:34:26 rpd
303 * Removed vm_region_old_behavior.
304 * [90/11/02 rpd]
305 *
306 * Revision 2.14 90/10/25 14:50:18 rwd
307 * Fixed bug in vm_map_enter that was introduced in 2.13.
308 * [90/10/21 rpd]
309 *
310 * Revision 2.13 90/10/12 13:05:48 rpd
311 * Removed copy_on_write field.
312 * [90/10/08 rpd]
313 *
314 * Revision 2.12 90/08/06 15:08:31 rwd
315 * Fixed several bugs in the overwriting-permanent-memory case of
316 * vm_map_copy_overwrite, including an object reference leak.
317 * [90/07/26 rpd]
318 *
319 * Revision 2.11 90/06/19 23:02:09 rpd
320 * Picked up vm_submap_object, vm_map_fork share-map revisions,
321 * including Bohman's bug fix.
322 * [90/06/08 rpd]
323 *
324 * Fixed vm_region so that it doesn't treat sub-map entries (only
325 * found in the kernel map) as regular entries. Instead, it just
326 * ignores them and doesn't try to send back an object_name reference.
327 * [90/03/23 gk]
328 *
329 * Revision 2.10 90/06/02 15:10:57 rpd
330 * Moved vm_mapped_pages_info to vm/vm_debug.c.
331 * [90/05/31 rpd]
332 *
333 * In vm_map_copyin, if length is zero allow any source address.
334 * [90/04/23 rpd]
335 *
336 * Correct share/sub map confusion in vm_map_copy_overwrite.
337 * [90/04/22 rpd]
338 *
339 * In vm_map_copyout, make the current protection be VM_PROT_DEFAULT
340 * and the inheritance be VM_INHERIT_DEFAULT.
341 * [90/04/18 rpd]
342 *
343 * Removed some extraneous code from vm_map_copyin/vm_map_copyout.
344 * [90/03/28 rpd]
345 * Updated to new vm_map_pageable, with user_wired_count.
346 * Several bug fixes for vm_map_copy_overwrite.
347 * Added vm_map_copyin_object.
348 * [90/03/26 23:14:56 rpd]
349 *
350 * Revision 2.9 90/05/29 18:38:46 rwd
351 * Add flag to turn off forced pmap_enters in vm_map call.
352 * [90/05/12 rwd]
353 * Bug fix from rpd for OOL data to VM_PROT_DEFAULT. New
354 * vm_map_pmap_enter from rfr to preemtively enter pages on vm_map
355 * calls.
356 * [90/04/20 rwd]
357 *
358 * Revision 2.8 90/05/03 15:52:42 dbg
359 * Fix vm_map_copyout to set current protection of new entries to
360 * VM_PROT_DEFAULT, to match vm_allocate.
361 * [90/04/12 dbg]
362 *
363 * Add vm_mapped_pages_info under switch MACH_DEBUG.
364 * [90/04/06 dbg]
365 *
366 * Revision 2.7 90/02/22 20:05:52 dbg
367 * Combine fields in vm_map and vm_map_copy into a vm_map_header
368 * structure. Fix macros dealing with vm_map_t and vm_map_copy_t
369 * to operate on the header, so that most of the code they use can
370 * move back into the associated functions (to reduce space).
371 * [90/01/29 dbg]
372 *
373 * Add missing code to copy map entries from pageable to
374 * non-pageable zone in vm_map_copyout. Try to avoid
375 * vm_object_copy in vm_map_copyin if source will be
376 * destroyed. Fix vm_map_copy_overwrite to correctly
377 * check for gaps in destination when destination is
378 * temporary.
379 * [90/01/26 dbg]
380 *
381 * Add keep_wired parameter to vm_map_copyin.
382 * Remove vm_map_check_protection and vm_map_insert (not used).
383 * Rewrite vm_map_find to call vm_map_enter - should fix all
384 * callers instead.
385 * [90/01/25 dbg]
386 *
387 * Add changes from mainline:
388 *
389 * Fixed syntax errors in vm_map_print.
390 * Fixed use of vm_object_copy_slowly in vm_map_copyin.
391 * Restored similar fix to vm_map_copy_entry.
392 * [89/12/01 13:56:30 rpd]
393 * Make sure object lock is held before calling
394 * vm_object_copy_slowly. Release old destination object in wired
395 * case of vm_map_copy_entry. Fixes from rpd.
396 * [89/12/15 dlb]
397 *
398 * Modify vm_map_pageable to create new objects BEFORE clipping
399 * map entries to avoid object proliferation.
400 * [88/11/30 dlb]
401 *
402 * Check for holes when wiring memory in vm_map_pageable.
403 * Pass requested access type to vm_map_pageable and check it.
404 * [88/11/21 dlb]
405 *
406 * Handle overwriting permanent objects in vm_map_copy_overwrite().
407 *
408 * Put optimized copy path in vm_map_fork().
409 * [89/10/01 23:24:32 mwyoung]
410 *
411 * Integrate the "wait for space" option for kernel maps
412 * into this module.
413 *
414 * Add vm_map_copyin(), vm_map_copyout(), vm_map_copy_discard() to
415 * perform map copies.
416 *
417 * Convert vm_map_entry_create(), vm_map_clip_{start,end} so that
418 * they may be used with either a vm_map_t or a vm_map_copy_t.
419 *
420 * Use vme_next, vme_prev, vme_start, vme_end, vm_map_to_entry.
421 * [89/08/31 21:12:23 rpd]
422 *
423 * Picked up NeXT change to vm_region: now if you give it an
424 * address in the middle of an entry, it will use the start of
425 * the entry.
426 * [89/08/20 23:19:39 rpd]
427 *
428 * A bug fix from NeXT: vm_map_protect wasn't unlocking in the
429 * is_sub_map case. Also, fixed vm_map_copy_entry to not take
430 * the address of needs_copy, because it is a bit-field now.
431 * [89/08/19 23:43:55 rpd]
432 *
433 * Revision 2.6 90/01/22 23:09:20 af
434 * Added vm_map_machine_attributes().
435 * [90/01/20 17:27:12 af]
436 *
437 * Revision 2.5 90/01/19 14:36:05 rwd
438 * Enter wired pages in destination pmap in vm_move_entry_range, to
439 * correctly implement wiring semantics.
440 * [90/01/16 dbg]
441 *
442 * Revision 2.4 89/11/29 14:18:19 af
443 * Redefine VM_PROT_DEFAULT locally for mips.
444 *
445 * Revision 2.3 89/09/08 11:28:29 dbg
446 * Add hack to avoid deadlocking while wiring kernel memory.
447 * [89/08/31 dbg]
448 *
449 * Merged with [UNDOCUMENTED!] changes from rfr.
450 * [89/08/15 dbg]
451 *
452 * Clip source map entry in vm_move_entry_range, per RFR. Marking
453 * the entire data section copy-on-write is costing more than the
454 * clips (or not being able to collapse the object) ever would.
455 * [89/07/24 dbg]
456 *
457 * Add keep_wired parameter to vm_map_move, to wire destination if
458 * source is wired.
459 * [89/07/14 dbg]
460 *
461 * Revision 2.2 89/08/11 17:57:01 rwd
462 * Changes for MACH_KERNEL:
463 * . Break out the inner loop of vm_map_enter, so that
464 * kmem_alloc can use it.
465 * . Add vm_map_move as special case of vm_allocate/vm_map_copy.
466 * [89/04/28 dbg]
467 *
468 * Revision 2.11 89/04/18 21:25:58 mwyoung
469 * Recent history [mwyoung]:
470 * Add vm_map_simplify() to keep kernel maps more compact.
471 * Condensed history:
472 * Add vm_map_enter(). [mwyoung]
473 * Return a "version" from vm_map_lookup() to simplify
474 * locking. [mwyoung]
475 * Get pageability changes right. [dbg, dlb]
476 * Original implementation. [avie, mwyoung, dbg]
477 *
478 */
479 /*
480 * File: vm/vm_map.c
481 * Author: Avadis Tevanian, Jr., Michael Wayne Young
482 * Date: 1985
483 *
484 * Virtual memory mapping module.
485 */
486
487 #include <norma_ipc.h>
488
489 #include <mach/kern_return.h>
490 #include <mach/port.h>
491 #include <mach/vm_attributes.h>
492 #include <mach/vm_param.h>
493 #include <kern/assert.h>
494 #include <kern/zalloc.h>
495 #include <vm/vm_fault.h>
496 #include <vm/vm_map.h>
497 #include <vm/vm_object.h>
498 #include <vm/vm_page.h>
499 #include <vm/vm_kern.h>
500 #include <ipc/ipc_port.h>
501
502
503 /*
504 * Macros to copy a vm_map_entry. We must be careful to correctly
505 * manage the wired page count. vm_map_entry_copy() creates a new
506 * map entry to the same memory - the wired count in the new entry
507 * must be set to zero. vm_map_entry_copy_full() creates a new
508 * entry that is identical to the old entry. This preserves the
509 * wire count; it's used for map splitting and zone changing in
510 * vm_map_copyout.
511 */
512 #define vm_map_entry_copy(NEW,OLD) \
513 MACRO_BEGIN \
514 *(NEW) = *(OLD); \
515 (NEW)->is_shared = FALSE; \
516 (NEW)->needs_wakeup = FALSE; \
517 (NEW)->in_transition = FALSE; \
518 (NEW)->wired_count = 0; \
519 (NEW)->user_wired_count = 0; \
520 MACRO_END
521
522 #define vm_map_entry_copy_full(NEW,OLD) (*(NEW) = *(OLD))
523
524 /*
525 * Virtual memory maps provide for the mapping, protection,
526 * and sharing of virtual memory objects. In addition,
527 * this module provides for an efficient virtual copy of
528 * memory from one map to another.
529 *
530 * Synchronization is required prior to most operations.
531 *
532 * Maps consist of an ordered doubly-linked list of simple
533 * entries; a single hint is used to speed up lookups.
534 *
535 * Sharing maps have been deleted from this version of Mach.
536 * All shared objects are now mapped directly into the respective
537 * maps. This requires a change in the copy on write strategy;
538 * the asymmetric (delayed) strategy is used for shared temporary
539 * objects instead of the symmetric (shadow) strategy. This is
540 * selected by the (new) use_shared_copy bit in the object. See
541 * vm_object_copy_temporary in vm_object.c for details. All maps
542 * are now "top level" maps (either task map, kernel map or submap
543 * of the kernel map).
544 *
545 * Since portions of maps are specified by start/end addreses,
546 * which may not align with existing map entries, all
547 * routines merely "clip" entries to these start/end values.
548 * [That is, an entry is split into two, bordering at a
549 * start or end value.] Note that these clippings may not
550 * always be necessary (as the two resulting entries are then
551 * not changed); however, the clipping is done for convenience.
552 * No attempt is currently made to "glue back together" two
553 * abutting entries.
554 *
555 * The symmetric (shadow) copy strategy implements virtual copy
556 * by copying VM object references from one map to
557 * another, and then marking both regions as copy-on-write.
558 * It is important to note that only one writeable reference
559 * to a VM object region exists in any map when this strategy
560 * is used -- this means that shadow object creation can be
561 * delayed until a write operation occurs. The symmetric (delayed)
562 * strategy allows multiple maps to have writeable references to
563 * the same region of a vm object, and hence cannot delay creating
564 * its copy objects. See vm_object_copy_temporary() in vm_object.c.
565 * Copying of permanent objects is completely different; see
566 * vm_object_copy_strategically() in vm_object.c.
567 */
568
569 zone_t vm_map_zone; /* zone for vm_map structures */
570 zone_t vm_map_entry_zone; /* zone for vm_map_entry structures */
571 zone_t vm_map_kentry_zone; /* zone for kernel entry structures */
572 zone_t vm_map_copy_zone; /* zone for vm_map_copy structures */
573
574 boolean_t vm_map_lookup_entry(); /* forward declaration */
575
576 /*
577 * Placeholder object for submap operations. This object is dropped
578 * into the range by a call to vm_map_find, and removed when
579 * vm_map_submap creates the submap.
580 */
581
582 vm_object_t vm_submap_object;
583
584 /*
585 * vm_map_init:
586 *
587 * Initialize the vm_map module. Must be called before
588 * any other vm_map routines.
589 *
590 * Map and entry structures are allocated from zones -- we must
591 * initialize those zones.
592 *
593 * There are three zones of interest:
594 *
595 * vm_map_zone: used to allocate maps.
596 * vm_map_entry_zone: used to allocate map entries.
597 * vm_map_kentry_zone: used to allocate map entries for the kernel.
598 *
599 * The kernel allocates map entries from a special zone that is initially
600 * "crammed" with memory. It would be difficult (perhaps impossible) for
601 * the kernel to allocate more memory to a entry zone when it became
602 * empty since the very act of allocating memory implies the creatio
603 * of a new entry.
604 */
605
606 vm_offset_t kentry_data;
607 vm_size_t kentry_data_size;
608 int kentry_count = 256; /* to init kentry_data_size */
609
610 void vm_map_init()
611 {
612 vm_map_zone = zinit((vm_size_t) sizeof(struct vm_map), 40*1024,
613 PAGE_SIZE, FALSE, "maps");
614 vm_map_entry_zone = zinit((vm_size_t) sizeof(struct vm_map_entry),
615 1024*1024, PAGE_SIZE*5,
616 FALSE, "non-kernel map entries");
617 vm_map_kentry_zone = zinit((vm_size_t) sizeof(struct vm_map_entry),
618 kentry_data_size, kentry_data_size,
619 FALSE, "kernel map entries");
620
621 vm_map_copy_zone = zinit((vm_size_t) sizeof(struct vm_map_copy),
622 16*1024, PAGE_SIZE, FALSE,
623 "map copies");
624
625 /*
626 * Cram the kentry zone with initial data.
627 */
628 zcram(vm_map_kentry_zone, kentry_data, kentry_data_size);
629
630 /*
631 * Submap object is initialized by vm_object_init.
632 */
633 }
634
635 /*
636 * vm_map_create:
637 *
638 * Creates and returns a new empty VM map with
639 * the given physical map structure, and having
640 * the given lower and upper address bounds.
641 */
642 vm_map_t vm_map_create(pmap, min, max, pageable)
643 pmap_t pmap;
644 vm_offset_t min, max;
645 boolean_t pageable;
646 {
647 register vm_map_t result;
648
649 result = (vm_map_t) zalloc(vm_map_zone);
650 if (result == VM_MAP_NULL)
651 panic("vm_map_create");
652
653 vm_map_first_entry(result) = vm_map_to_entry(result);
654 vm_map_last_entry(result) = vm_map_to_entry(result);
655 result->hdr.nentries = 0;
656 result->hdr.entries_pageable = pageable;
657
658 result->size = 0;
659 result->ref_count = 1;
660 result->pmap = pmap;
661 result->min_offset = min;
662 result->max_offset = max;
663 result->wiring_required = FALSE;
664 result->wait_for_space = FALSE;
665 result->first_free = vm_map_to_entry(result);
666 result->hint = vm_map_to_entry(result);
667 vm_map_lock_init(result);
668 simple_lock_init(&result->ref_lock);
669 simple_lock_init(&result->hint_lock);
670
671 return(result);
672 }
673
674 /*
675 * vm_map_entry_create: [ internal use only ]
676 *
677 * Allocates a VM map entry for insertion in the
678 * given map (or map copy). No fields are filled.
679 */
680 #define vm_map_entry_create(map) \
681 _vm_map_entry_create(&(map)->hdr)
682
683 #define vm_map_copy_entry_create(copy) \
684 _vm_map_entry_create(&(copy)->cpy_hdr)
685
686 vm_map_entry_t _vm_map_entry_create(map_header)
687 register struct vm_map_header *map_header;
688 {
689 register zone_t zone;
690 register vm_map_entry_t entry;
691
692 if (map_header->entries_pageable)
693 zone = vm_map_entry_zone;
694 else
695 zone = vm_map_kentry_zone;
696
697 entry = (vm_map_entry_t) zalloc(zone);
698 if (entry == VM_MAP_ENTRY_NULL)
699 panic("vm_map_entry_create");
700
701 return(entry);
702 }
703
704 /*
705 * vm_map_entry_dispose: [ internal use only ]
706 *
707 * Inverse of vm_map_entry_create.
708 */
709 #define vm_map_entry_dispose(map, entry) \
710 _vm_map_entry_dispose(&(map)->hdr, (entry))
711
712 #define vm_map_copy_entry_dispose(map, entry) \
713 _vm_map_entry_dispose(&(copy)->cpy_hdr, (entry))
714
715 void _vm_map_entry_dispose(map_header, entry)
716 register struct vm_map_header *map_header;
717 register vm_map_entry_t entry;
718 {
719 register zone_t zone;
720
721 if (map_header->entries_pageable)
722 zone = vm_map_entry_zone;
723 else
724 zone = vm_map_kentry_zone;
725
726 zfree(zone, (vm_offset_t) entry);
727 }
728
729 /*
730 * vm_map_entry_{un,}link:
731 *
732 * Insert/remove entries from maps (or map copies).
733 */
734 #define vm_map_entry_link(map, after_where, entry) \
735 _vm_map_entry_link(&(map)->hdr, after_where, entry)
736
737 #define vm_map_copy_entry_link(copy, after_where, entry) \
738 _vm_map_entry_link(&(copy)->cpy_hdr, after_where, entry)
739
740 #define _vm_map_entry_link(hdr, after_where, entry) \
741 MACRO_BEGIN \
742 (hdr)->nentries++; \
743 (entry)->vme_prev = (after_where); \
744 (entry)->vme_next = (after_where)->vme_next; \
745 (entry)->vme_prev->vme_next = \
746 (entry)->vme_next->vme_prev = (entry); \
747 MACRO_END
748
749 #define vm_map_entry_unlink(map, entry) \
750 _vm_map_entry_unlink(&(map)->hdr, entry)
751
752 #define vm_map_copy_entry_unlink(copy, entry) \
753 _vm_map_entry_unlink(&(copy)->cpy_hdr, entry)
754
755 #define _vm_map_entry_unlink(hdr, entry) \
756 MACRO_BEGIN \
757 (hdr)->nentries--; \
758 (entry)->vme_next->vme_prev = (entry)->vme_prev; \
759 (entry)->vme_prev->vme_next = (entry)->vme_next; \
760 MACRO_END
761
762 /*
763 * vm_map_reference:
764 *
765 * Creates another valid reference to the given map.
766 *
767 */
768 void vm_map_reference(map)
769 register vm_map_t map;
770 {
771 if (map == VM_MAP_NULL)
772 return;
773
774 simple_lock(&map->ref_lock);
775 map->ref_count++;
776 simple_unlock(&map->ref_lock);
777 }
778
779 /*
780 * vm_map_deallocate:
781 *
782 * Removes a reference from the specified map,
783 * destroying it if no references remain.
784 * The map should not be locked.
785 */
786 void vm_map_deallocate(map)
787 register vm_map_t map;
788 {
789 register int c;
790
791 if (map == VM_MAP_NULL)
792 return;
793
794 simple_lock(&map->ref_lock);
795 c = --map->ref_count;
796 simple_unlock(&map->ref_lock);
797
798 if (c > 0) {
799 return;
800 }
801
802 projected_buffer_collect(map);
803 (void) vm_map_delete(map, map->min_offset, map->max_offset);
804
805 pmap_destroy(map->pmap);
806
807 zfree(vm_map_zone, (vm_offset_t) map);
808 }
809
810 /*
811 * SAVE_HINT:
812 *
813 * Saves the specified entry as the hint for
814 * future lookups. Performs necessary interlocks.
815 */
816 #define SAVE_HINT(map,value) \
817 simple_lock(&(map)->hint_lock); \
818 (map)->hint = (value); \
819 simple_unlock(&(map)->hint_lock);
820
821 /*
822 * vm_map_lookup_entry: [ internal use only ]
823 *
824 * Finds the map entry containing (or
825 * immediately preceding) the specified address
826 * in the given map; the entry is returned
827 * in the "entry" parameter. The boolean
828 * result indicates whether the address is
829 * actually contained in the map.
830 */
831 boolean_t vm_map_lookup_entry(map, address, entry)
832 register vm_map_t map;
833 register vm_offset_t address;
834 vm_map_entry_t *entry; /* OUT */
835 {
836 register vm_map_entry_t cur;
837 register vm_map_entry_t last;
838
839 /*
840 * Start looking either from the head of the
841 * list, or from the hint.
842 */
843
844 simple_lock(&map->hint_lock);
845 cur = map->hint;
846 simple_unlock(&map->hint_lock);
847
848 if (cur == vm_map_to_entry(map))
849 cur = cur->vme_next;
850
851 if (address >= cur->vme_start) {
852 /*
853 * Go from hint to end of list.
854 *
855 * But first, make a quick check to see if
856 * we are already looking at the entry we
857 * want (which is usually the case).
858 * Note also that we don't need to save the hint
859 * here... it is the same hint (unless we are
860 * at the header, in which case the hint didn't
861 * buy us anything anyway).
862 */
863 last = vm_map_to_entry(map);
864 if ((cur != last) && (cur->vme_end > address)) {
865 *entry = cur;
866 return(TRUE);
867 }
868 }
869 else {
870 /*
871 * Go from start to hint, *inclusively*
872 */
873 last = cur->vme_next;
874 cur = vm_map_first_entry(map);
875 }
876
877 /*
878 * Search linearly
879 */
880
881 while (cur != last) {
882 if (cur->vme_end > address) {
883 if (address >= cur->vme_start) {
884 /*
885 * Save this lookup for future
886 * hints, and return
887 */
888
889 *entry = cur;
890 SAVE_HINT(map, cur);
891 return(TRUE);
892 }
893 break;
894 }
895 cur = cur->vme_next;
896 }
897 *entry = cur->vme_prev;
898 SAVE_HINT(map, *entry);
899 return(FALSE);
900 }
901
902 /*
903 * Routine: invalid_user_access
904 *
905 * Verifies whether user access is valid.
906 */
907
908 boolean_t
909 invalid_user_access(map, start, end, prot)
910 vm_map_t map;
911 vm_offset_t start, end;
912 vm_prot_t prot;
913 {
914 vm_map_entry_t entry;
915
916 return (map == VM_MAP_NULL || map == kernel_map ||
917 !vm_map_lookup_entry(map, start, &entry) ||
918 entry->vme_end < end ||
919 (prot & ~(entry->protection)));
920 }
921
922
923 /*
924 * Routine: vm_map_find_entry
925 * Purpose:
926 * Allocate a range in the specified virtual address map,
927 * returning the entry allocated for that range.
928 * Used by kmem_alloc, etc. Returns wired entries.
929 *
930 * The map must be locked.
931 *
932 * If an entry is allocated, the object/offset fields
933 * are initialized to zero. If an object is supplied,
934 * then an existing entry may be extended.
935 */
936 kern_return_t vm_map_find_entry(map, address, size, mask, object, o_entry)
937 register vm_map_t map;
938 vm_offset_t *address; /* OUT */
939 vm_size_t size;
940 vm_offset_t mask;
941 vm_object_t object;
942 vm_map_entry_t *o_entry; /* OUT */
943 {
944 register vm_map_entry_t entry, new_entry;
945 register vm_offset_t start;
946 register vm_offset_t end;
947
948 /*
949 * Look for the first possible address;
950 * if there's already something at this
951 * address, we have to start after it.
952 */
953
954 if ((entry = map->first_free) == vm_map_to_entry(map))
955 start = map->min_offset;
956 else
957 start = entry->vme_end;
958
959 /*
960 * In any case, the "entry" always precedes
961 * the proposed new region throughout the loop:
962 */
963
964 while (TRUE) {
965 register vm_map_entry_t next;
966
967 /*
968 * Find the end of the proposed new region.
969 * Be sure we didn't go beyond the end, or
970 * wrap around the address.
971 */
972
973 start = ((start + mask) & ~mask);
974 end = start + size;
975
976 if ((end > map->max_offset) || (end < start))
977 return(KERN_NO_SPACE);
978
979 /*
980 * If there are no more entries, we must win.
981 */
982
983 next = entry->vme_next;
984 if (next == vm_map_to_entry(map))
985 break;
986
987 /*
988 * If there is another entry, it must be
989 * after the end of the potential new region.
990 */
991
992 if (next->vme_start >= end)
993 break;
994
995 /*
996 * Didn't fit -- move to the next entry.
997 */
998
999 entry = next;
1000 start = entry->vme_end;
1001 }
1002
1003 /*
1004 * At this point,
1005 * "start" and "end" should define the endpoints of the
1006 * available new range, and
1007 * "entry" should refer to the region before the new
1008 * range, and
1009 *
1010 * the map should be locked.
1011 */
1012
1013 *address = start;
1014
1015 /*
1016 * See whether we can avoid creating a new entry by
1017 * extending one of our neighbors. [So far, we only attempt to
1018 * extend from below.]
1019 */
1020
1021 if ((object != VM_OBJECT_NULL) &&
1022 (entry != vm_map_to_entry(map)) &&
1023 (entry->vme_end == start) &&
1024 (!entry->is_shared) &&
1025 (!entry->is_sub_map) &&
1026 (entry->object.vm_object == object) &&
1027 (entry->needs_copy == FALSE) &&
1028 (entry->inheritance == VM_INHERIT_DEFAULT) &&
1029 (entry->protection == VM_PROT_DEFAULT) &&
1030 (entry->max_protection == VM_PROT_ALL) &&
1031 (entry->wired_count == 1) &&
1032 (entry->user_wired_count == 0) &&
1033 (entry->projected_on == 0)) {
1034 /*
1035 * Because this is a special case,
1036 * we don't need to use vm_object_coalesce.
1037 */
1038
1039 entry->vme_end = end;
1040 new_entry = entry;
1041 } else {
1042 new_entry = vm_map_entry_create(map);
1043
1044 new_entry->vme_start = start;
1045 new_entry->vme_end = end;
1046
1047 new_entry->is_shared = FALSE;
1048 new_entry->is_sub_map = FALSE;
1049 new_entry->object.vm_object = VM_OBJECT_NULL;
1050 new_entry->offset = (vm_offset_t) 0;
1051
1052 new_entry->needs_copy = FALSE;
1053
1054 new_entry->inheritance = VM_INHERIT_DEFAULT;
1055 new_entry->protection = VM_PROT_DEFAULT;
1056 new_entry->max_protection = VM_PROT_ALL;
1057 new_entry->wired_count = 1;
1058 new_entry->user_wired_count = 0;
1059
1060 new_entry->in_transition = FALSE;
1061 new_entry->needs_wakeup = FALSE;
1062 new_entry->projected_on = 0;
1063
1064 /*
1065 * Insert the new entry into the list
1066 */
1067
1068 vm_map_entry_link(map, entry, new_entry);
1069 }
1070
1071 map->size += size;
1072
1073 /*
1074 * Update the free space hint and the lookup hint
1075 */
1076
1077 map->first_free = new_entry;
1078 SAVE_HINT(map, new_entry);
1079
1080 *o_entry = new_entry;
1081 return(KERN_SUCCESS);
1082 }
1083
1084 int vm_map_pmap_enter_print = FALSE;
1085 int vm_map_pmap_enter_enable = FALSE;
1086
1087 /*
1088 * Routine: vm_map_pmap_enter
1089 *
1090 * Description:
1091 * Force pages from the specified object to be entered into
1092 * the pmap at the specified address if they are present.
1093 * As soon as a page not found in the object the scan ends.
1094 *
1095 * Returns:
1096 * Nothing.
1097 *
1098 * In/out conditions:
1099 * The source map should not be locked on entry.
1100 */
1101 void
1102 vm_map_pmap_enter(map, addr, end_addr, object, offset, protection)
1103 vm_map_t map;
1104 register
1105 vm_offset_t addr;
1106 register
1107 vm_offset_t end_addr;
1108 register
1109 vm_object_t object;
1110 vm_offset_t offset;
1111 vm_prot_t protection;
1112 {
1113 while (addr < end_addr) {
1114 register vm_page_t m;
1115
1116 vm_object_lock(object);
1117 vm_object_paging_begin(object);
1118
1119 m = vm_page_lookup(object, offset);
1120 if (m == VM_PAGE_NULL || m->absent) {
1121 vm_object_paging_end(object);
1122 vm_object_unlock(object);
1123 return;
1124 }
1125
1126 if (vm_map_pmap_enter_print) {
1127 printf("vm_map_pmap_enter:");
1128 printf("map: %x, addr: %x, object: %x, offset: %x\n",
1129 map, addr, object, offset);
1130 }
1131
1132 m->busy = TRUE;
1133 vm_object_unlock(object);
1134
1135 PMAP_ENTER(map->pmap, addr, m,
1136 protection, FALSE);
1137
1138 vm_object_lock(object);
1139 PAGE_WAKEUP_DONE(m);
1140 vm_page_lock_queues();
1141 if (!m->active && !m->inactive)
1142 vm_page_activate(m);
1143 vm_page_unlock_queues();
1144 vm_object_paging_end(object);
1145 vm_object_unlock(object);
1146
1147 offset += PAGE_SIZE;
1148 addr += PAGE_SIZE;
1149 }
1150 }
1151
1152 /*
1153 * Routine: vm_map_enter
1154 *
1155 * Description:
1156 * Allocate a range in the specified virtual address map.
1157 * The resulting range will refer to memory defined by
1158 * the given memory object and offset into that object.
1159 *
1160 * Arguments are as defined in the vm_map call.
1161 */
1162 kern_return_t vm_map_enter(
1163 map,
1164 address, size, mask, anywhere,
1165 object, offset, needs_copy,
1166 cur_protection, max_protection, inheritance)
1167 register
1168 vm_map_t map;
1169 vm_offset_t *address; /* IN/OUT */
1170 vm_size_t size;
1171 vm_offset_t mask;
1172 boolean_t anywhere;
1173 vm_object_t object;
1174 vm_offset_t offset;
1175 boolean_t needs_copy;
1176 vm_prot_t cur_protection;
1177 vm_prot_t max_protection;
1178 vm_inherit_t inheritance;
1179 {
1180 register vm_map_entry_t entry;
1181 register vm_offset_t start;
1182 register vm_offset_t end;
1183 kern_return_t result = KERN_SUCCESS;
1184
1185 #define RETURN(value) { result = value; goto BailOut; }
1186
1187 StartAgain: ;
1188
1189 start = *address;
1190
1191 if (anywhere) {
1192 vm_map_lock(map);
1193
1194 /*
1195 * Calculate the first possible address.
1196 */
1197
1198 if (start < map->min_offset)
1199 start = map->min_offset;
1200 if (start > map->max_offset)
1201 RETURN(KERN_NO_SPACE);
1202
1203 /*
1204 * Look for the first possible address;
1205 * if there's already something at this
1206 * address, we have to start after it.
1207 */
1208
1209 if (start == map->min_offset) {
1210 if ((entry = map->first_free) != vm_map_to_entry(map))
1211 start = entry->vme_end;
1212 } else {
1213 vm_map_entry_t tmp_entry;
1214 if (vm_map_lookup_entry(map, start, &tmp_entry))
1215 start = tmp_entry->vme_end;
1216 entry = tmp_entry;
1217 }
1218
1219 /*
1220 * In any case, the "entry" always precedes
1221 * the proposed new region throughout the
1222 * loop:
1223 */
1224
1225 while (TRUE) {
1226 register vm_map_entry_t next;
1227
1228 /*
1229 * Find the end of the proposed new region.
1230 * Be sure we didn't go beyond the end, or
1231 * wrap around the address.
1232 */
1233
1234 start = ((start + mask) & ~mask);
1235 end = start + size;
1236
1237 if ((end > map->max_offset) || (end < start)) {
1238 if (map->wait_for_space) {
1239 if (size <= (map->max_offset -
1240 map->min_offset)) {
1241 assert_wait((event_t) map, TRUE);
1242 vm_map_unlock(map);
1243 thread_block((void (*)()) 0);
1244 goto StartAgain;
1245 }
1246 }
1247
1248 RETURN(KERN_NO_SPACE);
1249 }
1250
1251 /*
1252 * If there are no more entries, we must win.
1253 */
1254
1255 next = entry->vme_next;
1256 if (next == vm_map_to_entry(map))
1257 break;
1258
1259 /*
1260 * If there is another entry, it must be
1261 * after the end of the potential new region.
1262 */
1263
1264 if (next->vme_start >= end)
1265 break;
1266
1267 /*
1268 * Didn't fit -- move to the next entry.
1269 */
1270
1271 entry = next;
1272 start = entry->vme_end;
1273 }
1274 *address = start;
1275 } else {
1276 vm_map_entry_t temp_entry;
1277
1278 /*
1279 * Verify that:
1280 * the address doesn't itself violate
1281 * the mask requirement.
1282 */
1283
1284 if ((start & mask) != 0)
1285 return(KERN_NO_SPACE);
1286
1287 vm_map_lock(map);
1288
1289 /*
1290 * ... the address is within bounds
1291 */
1292
1293 end = start + size;
1294
1295 if ((start < map->min_offset) ||
1296 (end > map->max_offset) ||
1297 (start >= end)) {
1298 RETURN(KERN_INVALID_ADDRESS);
1299 }
1300
1301 /*
1302 * ... the starting address isn't allocated
1303 */
1304
1305 if (vm_map_lookup_entry(map, start, &temp_entry))
1306 RETURN(KERN_NO_SPACE);
1307
1308 entry = temp_entry;
1309
1310 /*
1311 * ... the next region doesn't overlap the
1312 * end point.
1313 */
1314
1315 if ((entry->vme_next != vm_map_to_entry(map)) &&
1316 (entry->vme_next->vme_start < end))
1317 RETURN(KERN_NO_SPACE);
1318 }
1319
1320 /*
1321 * At this point,
1322 * "start" and "end" should define the endpoints of the
1323 * available new range, and
1324 * "entry" should refer to the region before the new
1325 * range, and
1326 *
1327 * the map should be locked.
1328 */
1329
1330 /*
1331 * See whether we can avoid creating a new entry (and object) by
1332 * extending one of our neighbors. [So far, we only attempt to
1333 * extend from below.]
1334 */
1335
1336 if ((object == VM_OBJECT_NULL) &&
1337 (entry != vm_map_to_entry(map)) &&
1338 (entry->vme_end == start) &&
1339 (!entry->is_shared) &&
1340 (!entry->is_sub_map) &&
1341 (entry->inheritance == inheritance) &&
1342 (entry->protection == cur_protection) &&
1343 (entry->max_protection == max_protection) &&
1344 (entry->wired_count == 0) && /* implies user_wired_count == 0 */
1345 (entry->projected_on == 0)) {
1346 if (vm_object_coalesce(entry->object.vm_object,
1347 VM_OBJECT_NULL,
1348 entry->offset,
1349 (vm_offset_t) 0,
1350 (vm_size_t)(entry->vme_end - entry->vme_start),
1351 (vm_size_t)(end - entry->vme_end))) {
1352
1353 /*
1354 * Coalesced the two objects - can extend
1355 * the previous map entry to include the
1356 * new range.
1357 */
1358 map->size += (end - entry->vme_end);
1359 entry->vme_end = end;
1360 RETURN(KERN_SUCCESS);
1361 }
1362 }
1363
1364 /*
1365 * Create a new entry
1366 */
1367
1368 /**/ {
1369 register vm_map_entry_t new_entry;
1370
1371 new_entry = vm_map_entry_create(map);
1372
1373 new_entry->vme_start = start;
1374 new_entry->vme_end = end;
1375
1376 new_entry->is_shared = FALSE;
1377 new_entry->is_sub_map = FALSE;
1378 new_entry->object.vm_object = object;
1379 new_entry->offset = offset;
1380
1381 new_entry->needs_copy = needs_copy;
1382
1383 new_entry->inheritance = inheritance;
1384 new_entry->protection = cur_protection;
1385 new_entry->max_protection = max_protection;
1386 new_entry->wired_count = 0;
1387 new_entry->user_wired_count = 0;
1388
1389 new_entry->in_transition = FALSE;
1390 new_entry->needs_wakeup = FALSE;
1391 new_entry->projected_on = 0;
1392
1393 /*
1394 * Insert the new entry into the list
1395 */
1396
1397 vm_map_entry_link(map, entry, new_entry);
1398 map->size += size;
1399
1400 /*
1401 * Update the free space hint and the lookup hint
1402 */
1403
1404 if ((map->first_free == entry) &&
1405 (entry->vme_end >= new_entry->vme_start))
1406 map->first_free = new_entry;
1407
1408 SAVE_HINT(map, new_entry);
1409
1410 vm_map_unlock(map);
1411
1412 if ((object != VM_OBJECT_NULL) &&
1413 (vm_map_pmap_enter_enable) &&
1414 (!anywhere) &&
1415 (!needs_copy) &&
1416 (size < (128*1024))) {
1417 vm_map_pmap_enter(map, start, end,
1418 object, offset, cur_protection);
1419 }
1420
1421 return(result);
1422 /**/ }
1423
1424 BailOut: ;
1425
1426 vm_map_unlock(map);
1427 return(result);
1428
1429 #undef RETURN
1430 }
1431
1432 /*
1433 * vm_map_clip_start: [ internal use only ]
1434 *
1435 * Asserts that the given entry begins at or after
1436 * the specified address; if necessary,
1437 * it splits the entry into two.
1438 */
1439 void _vm_map_clip_start();
1440 #define vm_map_clip_start(map, entry, startaddr) \
1441 MACRO_BEGIN \
1442 if ((startaddr) > (entry)->vme_start) \
1443 _vm_map_clip_start(&(map)->hdr,(entry),(startaddr)); \
1444 MACRO_END
1445
1446 void _vm_map_copy_clip_start();
1447 #define vm_map_copy_clip_start(copy, entry, startaddr) \
1448 MACRO_BEGIN \
1449 if ((startaddr) > (entry)->vme_start) \
1450 _vm_map_clip_start(&(copy)->cpy_hdr,(entry),(startaddr)); \
1451 MACRO_END
1452
1453 /*
1454 * This routine is called only when it is known that
1455 * the entry must be split.
1456 */
1457 void _vm_map_clip_start(map_header, entry, start)
1458 register struct vm_map_header *map_header;
1459 register vm_map_entry_t entry;
1460 register vm_offset_t start;
1461 {
1462 register vm_map_entry_t new_entry;
1463
1464 /*
1465 * Split off the front portion --
1466 * note that we must insert the new
1467 * entry BEFORE this one, so that
1468 * this entry has the specified starting
1469 * address.
1470 */
1471
1472 new_entry = _vm_map_entry_create(map_header);
1473 vm_map_entry_copy_full(new_entry, entry);
1474
1475 new_entry->vme_end = start;
1476 entry->offset += (start - entry->vme_start);
1477 entry->vme_start = start;
1478
1479 _vm_map_entry_link(map_header, entry->vme_prev, new_entry);
1480
1481 if (entry->is_sub_map)
1482 vm_map_reference(new_entry->object.sub_map);
1483 else
1484 vm_object_reference(new_entry->object.vm_object);
1485 }
1486
1487 /*
1488 * vm_map_clip_end: [ internal use only ]
1489 *
1490 * Asserts that the given entry ends at or before
1491 * the specified address; if necessary,
1492 * it splits the entry into two.
1493 */
1494 void _vm_map_clip_end();
1495 #define vm_map_clip_end(map, entry, endaddr) \
1496 MACRO_BEGIN \
1497 if ((endaddr) < (entry)->vme_end) \
1498 _vm_map_clip_end(&(map)->hdr,(entry),(endaddr)); \
1499 MACRO_END
1500
1501 void _vm_map_copy_clip_end();
1502 #define vm_map_copy_clip_end(copy, entry, endaddr) \
1503 MACRO_BEGIN \
1504 if ((endaddr) < (entry)->vme_end) \
1505 _vm_map_clip_end(&(copy)->cpy_hdr,(entry),(endaddr)); \
1506 MACRO_END
1507
1508 /*
1509 * This routine is called only when it is known that
1510 * the entry must be split.
1511 */
1512 void _vm_map_clip_end(map_header, entry, end)
1513 register struct vm_map_header *map_header;
1514 register vm_map_entry_t entry;
1515 register vm_offset_t end;
1516 {
1517 register vm_map_entry_t new_entry;
1518
1519 /*
1520 * Create a new entry and insert it
1521 * AFTER the specified entry
1522 */
1523
1524 new_entry = _vm_map_entry_create(map_header);
1525 vm_map_entry_copy_full(new_entry, entry);
1526
1527 new_entry->vme_start = entry->vme_end = end;
1528 new_entry->offset += (end - entry->vme_start);
1529
1530 _vm_map_entry_link(map_header, entry, new_entry);
1531
1532 if (entry->is_sub_map)
1533 vm_map_reference(new_entry->object.sub_map);
1534 else
1535 vm_object_reference(new_entry->object.vm_object);
1536 }
1537
1538 /*
1539 * VM_MAP_RANGE_CHECK: [ internal use only ]
1540 *
1541 * Asserts that the starting and ending region
1542 * addresses fall within the valid range of the map.
1543 */
1544 #define VM_MAP_RANGE_CHECK(map, start, end) \
1545 { \
1546 if (start < vm_map_min(map)) \
1547 start = vm_map_min(map); \
1548 if (end > vm_map_max(map)) \
1549 end = vm_map_max(map); \
1550 if (start > end) \
1551 start = end; \
1552 }
1553
1554 /*
1555 * vm_map_submap: [ kernel use only ]
1556 *
1557 * Mark the given range as handled by a subordinate map.
1558 *
1559 * This range must have been created with vm_map_find using
1560 * the vm_submap_object, and no other operations may have been
1561 * performed on this range prior to calling vm_map_submap.
1562 *
1563 * Only a limited number of operations can be performed
1564 * within this rage after calling vm_map_submap:
1565 * vm_fault
1566 * [Don't try vm_map_copyin!]
1567 *
1568 * To remove a submapping, one must first remove the
1569 * range from the superior map, and then destroy the
1570 * submap (if desired). [Better yet, don't try it.]
1571 */
1572 kern_return_t vm_map_submap(map, start, end, submap)
1573 register vm_map_t map;
1574 register vm_offset_t start;
1575 register vm_offset_t end;
1576 vm_map_t submap;
1577 {
1578 vm_map_entry_t entry;
1579 register kern_return_t result = KERN_INVALID_ARGUMENT;
1580 register vm_object_t object;
1581
1582 vm_map_lock(map);
1583
1584 VM_MAP_RANGE_CHECK(map, start, end);
1585
1586 if (vm_map_lookup_entry(map, start, &entry)) {
1587 vm_map_clip_start(map, entry, start);
1588 }
1589 else
1590 entry = entry->vme_next;
1591
1592 vm_map_clip_end(map, entry, end);
1593
1594 if ((entry->vme_start == start) && (entry->vme_end == end) &&
1595 (!entry->is_sub_map) &&
1596 ((object = entry->object.vm_object) == vm_submap_object) &&
1597 (object->resident_page_count == 0) &&
1598 (object->copy == VM_OBJECT_NULL) &&
1599 (object->shadow == VM_OBJECT_NULL) &&
1600 (!object->pager_created)) {
1601 entry->object.vm_object = VM_OBJECT_NULL;
1602 vm_object_deallocate(object);
1603 entry->is_sub_map = TRUE;
1604 vm_map_reference(entry->object.sub_map = submap);
1605 result = KERN_SUCCESS;
1606 }
1607 vm_map_unlock(map);
1608
1609 return(result);
1610 }
1611
1612 /*
1613 * vm_map_protect:
1614 *
1615 * Sets the protection of the specified address
1616 * region in the target map. If "set_max" is
1617 * specified, the maximum protection is to be set;
1618 * otherwise, only the current protection is affected.
1619 */
1620 kern_return_t vm_map_protect(map, start, end, new_prot, set_max)
1621 register vm_map_t map;
1622 register vm_offset_t start;
1623 register vm_offset_t end;
1624 register vm_prot_t new_prot;
1625 register boolean_t set_max;
1626 {
1627 register vm_map_entry_t current;
1628 vm_map_entry_t entry;
1629
1630 vm_map_lock(map);
1631
1632 VM_MAP_RANGE_CHECK(map, start, end);
1633
1634 if (vm_map_lookup_entry(map, start, &entry)) {
1635 vm_map_clip_start(map, entry, start);
1636 }
1637 else
1638 entry = entry->vme_next;
1639
1640 /*
1641 * Make a first pass to check for protection
1642 * violations.
1643 */
1644
1645 current = entry;
1646 while ((current != vm_map_to_entry(map)) &&
1647 (current->vme_start < end)) {
1648
1649 if (current->is_sub_map) {
1650 vm_map_unlock(map);
1651 return(KERN_INVALID_ARGUMENT);
1652 }
1653 if ((new_prot & current->max_protection) != new_prot) {
1654 vm_map_unlock(map);
1655 return(KERN_PROTECTION_FAILURE);
1656 }
1657
1658 current = current->vme_next;
1659 }
1660
1661 /*
1662 * Go back and fix up protections.
1663 * [Note that clipping is not necessary the second time.]
1664 */
1665
1666 current = entry;
1667
1668 while ((current != vm_map_to_entry(map)) &&
1669 (current->vme_start < end)) {
1670
1671 vm_prot_t old_prot;
1672
1673 vm_map_clip_end(map, current, end);
1674
1675 old_prot = current->protection;
1676 if (set_max)
1677 current->protection =
1678 (current->max_protection = new_prot) &
1679 old_prot;
1680 else
1681 current->protection = new_prot;
1682
1683 /*
1684 * Update physical map if necessary.
1685 */
1686
1687 if (current->protection != old_prot) {
1688 pmap_protect(map->pmap, current->vme_start,
1689 current->vme_end,
1690 current->protection);
1691 }
1692 current = current->vme_next;
1693 }
1694
1695 vm_map_unlock(map);
1696 return(KERN_SUCCESS);
1697 }
1698
1699 /*
1700 * vm_map_inherit:
1701 *
1702 * Sets the inheritance of the specified address
1703 * range in the target map. Inheritance
1704 * affects how the map will be shared with
1705 * child maps at the time of vm_map_fork.
1706 */
1707 kern_return_t vm_map_inherit(map, start, end, new_inheritance)
1708 register vm_map_t map;
1709 register vm_offset_t start;
1710 register vm_offset_t end;
1711 register vm_inherit_t new_inheritance;
1712 {
1713 register vm_map_entry_t entry;
1714 vm_map_entry_t temp_entry;
1715
1716 vm_map_lock(map);
1717
1718 VM_MAP_RANGE_CHECK(map, start, end);
1719
1720 if (vm_map_lookup_entry(map, start, &temp_entry)) {
1721 entry = temp_entry;
1722 vm_map_clip_start(map, entry, start);
1723 }
1724 else
1725 entry = temp_entry->vme_next;
1726
1727 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
1728 vm_map_clip_end(map, entry, end);
1729
1730 entry->inheritance = new_inheritance;
1731
1732 entry = entry->vme_next;
1733 }
1734
1735 vm_map_unlock(map);
1736 return(KERN_SUCCESS);
1737 }
1738
1739 /*
1740 * vm_map_pageable_common:
1741 *
1742 * Sets the pageability of the specified address
1743 * range in the target map. Regions specified
1744 * as not pageable require locked-down physical
1745 * memory and physical page maps. access_type indicates
1746 * types of accesses that must not generate page faults.
1747 * This is checked against protection of memory being locked-down.
1748 * access_type of VM_PROT_NONE makes memory pageable.
1749 *
1750 * The map must not be locked, but a reference
1751 * must remain to the map throughout the call.
1752 *
1753 * Callers should use macros in vm/vm_map.h (i.e. vm_map_pageable,
1754 * or vm_map_pageable_user); don't call vm_map_pageable directly.
1755 */
1756 kern_return_t vm_map_pageable_common(map, start, end, access_type, user_wire)
1757 register vm_map_t map;
1758 register vm_offset_t start;
1759 register vm_offset_t end;
1760 register vm_prot_t access_type;
1761 boolean_t user_wire;
1762 {
1763 register vm_map_entry_t entry;
1764 vm_map_entry_t start_entry;
1765
1766 vm_map_lock(map);
1767
1768 VM_MAP_RANGE_CHECK(map, start, end);
1769
1770 if (vm_map_lookup_entry(map, start, &start_entry)) {
1771 entry = start_entry;
1772 /*
1773 * vm_map_clip_start will be done later.
1774 */
1775 }
1776 else {
1777 /*
1778 * Start address is not in map; this is fatal.
1779 */
1780 vm_map_unlock(map);
1781 return(KERN_FAILURE);
1782 }
1783
1784 /*
1785 * Actions are rather different for wiring and unwiring,
1786 * so we have two separate cases.
1787 */
1788
1789 if (access_type == VM_PROT_NONE) {
1790
1791 vm_map_clip_start(map, entry, start);
1792
1793 /*
1794 * Unwiring. First ensure that the range to be
1795 * unwired is really wired down.
1796 */
1797 while ((entry != vm_map_to_entry(map)) &&
1798 (entry->vme_start < end)) {
1799
1800 if ((entry->wired_count == 0) ||
1801 ((entry->vme_end < end) &&
1802 ((entry->vme_next == vm_map_to_entry(map)) ||
1803 (entry->vme_next->vme_start > entry->vme_end))) ||
1804 (user_wire && (entry->user_wired_count == 0))) {
1805 vm_map_unlock(map);
1806 return(KERN_INVALID_ARGUMENT);
1807 }
1808 entry = entry->vme_next;
1809 }
1810
1811 /*
1812 * Now decrement the wiring count for each region.
1813 * If a region becomes completely unwired,
1814 * unwire its physical pages and mappings.
1815 */
1816 entry = start_entry;
1817 while ((entry != vm_map_to_entry(map)) &&
1818 (entry->vme_start < end)) {
1819 vm_map_clip_end(map, entry, end);
1820
1821 if (user_wire) {
1822 if (--(entry->user_wired_count) == 0)
1823 entry->wired_count--;
1824 }
1825 else {
1826 entry->wired_count--;
1827 }
1828
1829 if (entry->wired_count == 0)
1830 vm_fault_unwire(map, entry);
1831
1832 entry = entry->vme_next;
1833 }
1834 }
1835
1836 else {
1837 /*
1838 * Wiring. We must do this in two passes:
1839 *
1840 * 1. Holding the write lock, we create any shadow
1841 * or zero-fill objects that need to be created.
1842 * Then we clip each map entry to the region to be
1843 * wired and increment its wiring count. We
1844 * create objects before clipping the map entries
1845 * to avoid object proliferation.
1846 *
1847 * 2. We downgrade to a read lock, and call
1848 * vm_fault_wire to fault in the pages for any
1849 * newly wired area (wired_count is 1).
1850 *
1851 * Downgrading to a read lock for vm_fault_wire avoids
1852 * a possible deadlock with another thread that may have
1853 * faulted on one of the pages to be wired (it would mark
1854 * the page busy, blocking us, then in turn block on the
1855 * map lock that we hold). Because of problems in the
1856 * recursive lock package, we cannot upgrade to a write
1857 * lock in vm_map_lookup. Thus, any actions that require
1858 * the write lock must be done beforehand. Because we
1859 * keep the read lock on the map, the copy-on-write
1860 * status of the entries we modify here cannot change.
1861 */
1862
1863 /*
1864 * Pass 1.
1865 */
1866 while ((entry != vm_map_to_entry(map)) &&
1867 (entry->vme_start < end)) {
1868 vm_map_clip_end(map, entry, end);
1869
1870 if (entry->wired_count == 0) {
1871
1872 /*
1873 * Perform actions of vm_map_lookup that need
1874 * the write lock on the map: create a shadow
1875 * object for a copy-on-write region, or an
1876 * object for a zero-fill region.
1877 */
1878 if (entry->needs_copy &&
1879 ((entry->protection & VM_PROT_WRITE) != 0)) {
1880
1881 vm_object_shadow(&entry->object.vm_object,
1882 &entry->offset,
1883 (vm_size_t)(entry->vme_end
1884 - entry->vme_start));
1885 entry->needs_copy = FALSE;
1886 }
1887 if (entry->object.vm_object == VM_OBJECT_NULL) {
1888 entry->object.vm_object =
1889 vm_object_allocate(
1890 (vm_size_t)(entry->vme_end
1891 - entry->vme_start));
1892 entry->offset = (vm_offset_t)0;
1893 }
1894 }
1895 vm_map_clip_start(map, entry, start);
1896 vm_map_clip_end(map, entry, end);
1897
1898 if (user_wire) {
1899 if ((entry->user_wired_count)++ == 0)
1900 entry->wired_count++;
1901 }
1902 else {
1903 entry->wired_count++;
1904 }
1905
1906 /*
1907 * Check for holes and protection mismatch.
1908 * Holes: Next entry should be contiguous unless
1909 * this is the end of the region.
1910 * Protection: Access requested must be allowed.
1911 */
1912 if (((entry->vme_end < end) &&
1913 ((entry->vme_next == vm_map_to_entry(map)) ||
1914 (entry->vme_next->vme_start > entry->vme_end))) ||
1915 ((entry->protection & access_type) != access_type)) {
1916 /*
1917 * Found a hole or protection problem.
1918 * Object creation actions
1919 * do not need to be undone, but the
1920 * wired counts need to be restored.
1921 */
1922 while ((entry != vm_map_to_entry(map)) &&
1923 (entry->vme_end > start)) {
1924 if (user_wire) {
1925 if (--(entry->user_wired_count) == 0)
1926 entry->wired_count--;
1927 }
1928 else {
1929 entry->wired_count--;
1930 }
1931
1932 entry = entry->vme_prev;
1933 }
1934
1935 vm_map_unlock(map);
1936 return(KERN_FAILURE);
1937 }
1938 entry = entry->vme_next;
1939 }
1940
1941 /*
1942 * Pass 2.
1943 */
1944
1945 /*
1946 * HACK HACK HACK HACK
1947 *
1948 * If we are wiring in the kernel map or a submap of it,
1949 * unlock the map to avoid deadlocks. We trust that the
1950 * kernel threads are well-behaved, and therefore will
1951 * not do anything destructive to this region of the map
1952 * while we have it unlocked. We cannot trust user threads
1953 * to do the same.
1954 *
1955 * HACK HACK HACK HACK
1956 */
1957 if (vm_map_pmap(map) == kernel_pmap) {
1958 vm_map_unlock(map); /* trust me ... */
1959 }
1960 else {
1961 vm_map_lock_set_recursive(map);
1962 vm_map_lock_write_to_read(map);
1963 }
1964
1965 entry = start_entry;
1966 while (entry != vm_map_to_entry(map) &&
1967 entry->vme_start < end) {
1968 /*
1969 * Wiring cases:
1970 * Kernel: wired == 1 && user_wired == 0
1971 * User: wired == 1 && user_wired == 1
1972 *
1973 * Don't need to wire if either is > 1. wired = 0 &&
1974 * user_wired == 1 can't happen.
1975 */
1976
1977 /*
1978 * XXX This assumes that the faults always succeed.
1979 */
1980 if ((entry->wired_count == 1) &&
1981 (entry->user_wired_count <= 1)) {
1982 vm_fault_wire(map, entry);
1983 }
1984 entry = entry->vme_next;
1985 }
1986
1987 if (vm_map_pmap(map) == kernel_pmap) {
1988 vm_map_lock(map);
1989 }
1990 else {
1991 vm_map_lock_clear_recursive(map);
1992 }
1993 }
1994
1995 vm_map_unlock(map);
1996
1997 return(KERN_SUCCESS);
1998 }
1999
2000 /*
2001 * vm_map_entry_delete: [ internal use only ]
2002 *
2003 * Deallocate the given entry from the target map.
2004 */
2005 void vm_map_entry_delete(map, entry)
2006 register vm_map_t map;
2007 register vm_map_entry_t entry;
2008 {
2009 register vm_offset_t s, e;
2010 register vm_object_t object;
2011 extern vm_object_t kernel_object;
2012
2013 s = entry->vme_start;
2014 e = entry->vme_end;
2015
2016 /*Check if projected buffer*/
2017 if (map != kernel_map && entry->projected_on != 0) {
2018 /*Check if projected kernel entry is persistent;
2019 may only manipulate directly if it is*/
2020 if (entry->projected_on->projected_on == 0)
2021 entry->wired_count = 0; /*Avoid unwire fault*/
2022 else
2023 return;
2024 }
2025
2026 /*
2027 * Get the object. Null objects cannot have pmap entries.
2028 */
2029
2030 if ((object = entry->object.vm_object) != VM_OBJECT_NULL) {
2031
2032 /*
2033 * Unwire before removing addresses from the pmap;
2034 * otherwise, unwiring will put the entries back in
2035 * the pmap.
2036 */
2037
2038 if (entry->wired_count != 0) {
2039 vm_fault_unwire(map, entry);
2040 entry->wired_count = 0;
2041 entry->user_wired_count = 0;
2042 }
2043
2044 /*
2045 * If the object is shared, we must remove
2046 * *all* references to this data, since we can't
2047 * find all of the physical maps which are sharing
2048 * it.
2049 */
2050
2051 if (object == kernel_object) {
2052 vm_object_lock(object);
2053 vm_object_page_remove(object, entry->offset,
2054 entry->offset + (e - s));
2055 vm_object_unlock(object);
2056 } else if (entry->is_shared) {
2057 vm_object_pmap_remove(object,
2058 entry->offset,
2059 entry->offset + (e - s));
2060 }
2061 else {
2062 pmap_remove(map->pmap, s, e);
2063 }
2064 }
2065
2066 /*
2067 * Deallocate the object only after removing all
2068 * pmap entries pointing to its pages.
2069 */
2070
2071 if (entry->is_sub_map)
2072 vm_map_deallocate(entry->object.sub_map);
2073 else
2074 vm_object_deallocate(entry->object.vm_object);
2075
2076 vm_map_entry_unlink(map, entry);
2077 map->size -= e - s;
2078
2079 vm_map_entry_dispose(map, entry);
2080 }
2081
2082 /*
2083 * vm_map_delete: [ internal use only ]
2084 *
2085 * Deallocates the given address range from the target
2086 * map.
2087 */
2088
2089 kern_return_t vm_map_delete(map, start, end)
2090 register vm_map_t map;
2091 register vm_offset_t start;
2092 register vm_offset_t end;
2093 {
2094 vm_map_entry_t entry;
2095 vm_map_entry_t first_entry;
2096
2097 /*
2098 * Find the start of the region, and clip it
2099 */
2100
2101 if (!vm_map_lookup_entry(map, start, &first_entry))
2102 entry = first_entry->vme_next;
2103 else {
2104 entry = first_entry;
2105 #if NORMA_IPC_xxx
2106 /*
2107 * XXX Had to disable this code because:
2108
2109 _vm_map_delete(c0804b78,c2198000,c219a000,0,c219a000)+df
2110 [vm/vm_map.c:2007]
2111 _vm_map_remove(c0804b78,c2198000,c219a000,c0817834,
2112 c081786c)+42 [vm/vm_map.c:2094]
2113 _kmem_io_map_deallocate(c0804b78,c2198000,2000,c0817834,
2114 c081786c)+43 [vm/vm_kern.c:818]
2115 _device_write_dealloc(c081786c)+117 [device/ds_routines.c:814]
2116 _ds_write_done(c081786c,0)+2e [device/ds_routines.c:848]
2117 _io_done_thread_continue(c08150c0,c21d4e14,c21d4e30,c08150c0,
2118 c080c114)+14 [device/ds_routines.c:1350]
2119
2120 */
2121 if (start > entry->vme_start
2122 && end == entry->vme_end
2123 && ! entry->wired_count /* XXX ??? */
2124 && ! entry->is_shared
2125 && ! entry->projected_on
2126 && ! entry->is_sub_map) {
2127 extern vm_object_t kernel_object;
2128 register vm_object_t object = entry->object.vm_object;
2129
2130 /*
2131 * The region to be deleted lives at the end
2132 * of this entry, and thus all we have to do is
2133 * truncate the entry.
2134 *
2135 * This special case is necessary if we want
2136 * coalescing to do us any good.
2137 *
2138 * XXX Do we have to adjust object size?
2139 */
2140 if (object == kernel_object) {
2141 vm_object_lock(object);
2142 vm_object_page_remove(object,
2143 entry->offset + start,
2144 entry->offset +
2145 (end - start));
2146 vm_object_unlock(object);
2147 } else if (entry->is_shared) {
2148 vm_object_pmap_remove(object,
2149 entry->offset + start,
2150 entry->offset +
2151 (end - start));
2152 } else {
2153 pmap_remove(map->pmap, start, end);
2154 }
2155 object->size -= (end - start); /* XXX */
2156
2157 entry->vme_end = start;
2158 map->size -= (end - start);
2159
2160 if (map->wait_for_space) {
2161 thread_wakeup((event_t) map);
2162 }
2163 return KERN_SUCCESS;
2164 }
2165 #endif NORMA_IPC
2166 vm_map_clip_start(map, entry, start);
2167
2168 /*
2169 * Fix the lookup hint now, rather than each
2170 * time though the loop.
2171 */
2172
2173 SAVE_HINT(map, entry->vme_prev);
2174 }
2175
2176 /*
2177 * Save the free space hint
2178 */
2179
2180 if (map->first_free->vme_start >= start)
2181 map->first_free = entry->vme_prev;
2182
2183 /*
2184 * Step through all entries in this region
2185 */
2186
2187 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
2188 vm_map_entry_t next;
2189
2190 vm_map_clip_end(map, entry, end);
2191
2192 /*
2193 * If the entry is in transition, we must wait
2194 * for it to exit that state. It could be clipped
2195 * while we leave the map unlocked.
2196 */
2197 if(entry->in_transition) {
2198 /*
2199 * Say that we are waiting, and wait for entry.
2200 */
2201 entry->needs_wakeup = TRUE;
2202 vm_map_entry_wait(map, FALSE);
2203 vm_map_lock(map);
2204
2205 /*
2206 * The entry could have been clipped or it
2207 * may not exist anymore. look it up again.
2208 */
2209 if(!vm_map_lookup_entry(map, start, &entry)) {
2210 entry = entry->vme_next;
2211 }
2212 continue;
2213 }
2214
2215 next = entry->vme_next;
2216
2217 vm_map_entry_delete(map, entry);
2218 entry = next;
2219 }
2220
2221 if (map->wait_for_space)
2222 thread_wakeup((event_t) map);
2223
2224 return(KERN_SUCCESS);
2225 }
2226
2227 /*
2228 * vm_map_remove:
2229 *
2230 * Remove the given address range from the target map.
2231 * This is the exported form of vm_map_delete.
2232 */
2233 kern_return_t vm_map_remove(map, start, end)
2234 register vm_map_t map;
2235 register vm_offset_t start;
2236 register vm_offset_t end;
2237 {
2238 register kern_return_t result;
2239
2240 vm_map_lock(map);
2241 VM_MAP_RANGE_CHECK(map, start, end);
2242 result = vm_map_delete(map, start, end);
2243 vm_map_unlock(map);
2244
2245 return(result);
2246 }
2247
2248
2249 /*
2250 * vm_map_copy_steal_pages:
2251 *
2252 * Steal all the pages from a vm_map_copy page_list by copying ones
2253 * that have not already been stolen.
2254 */
2255 void
2256 vm_map_copy_steal_pages(copy)
2257 vm_map_copy_t copy;
2258 {
2259 register vm_page_t m, new_m;
2260 register int i;
2261 vm_object_t object;
2262
2263 for (i = 0; i < copy->cpy_npages; i++) {
2264
2265 /*
2266 * If the page is not tabled, then it's already stolen.
2267 */
2268 m = copy->cpy_page_list[i];
2269 if (!m->tabled)
2270 continue;
2271
2272 /*
2273 * Page was not stolen, get a new
2274 * one and do the copy now.
2275 */
2276 while ((new_m = vm_page_grab()) == VM_PAGE_NULL) {
2277 VM_PAGE_WAIT((void(*)()) 0);
2278 }
2279
2280 vm_page_copy(m, new_m);
2281
2282 object = m->object;
2283 vm_object_lock(object);
2284 vm_page_lock_queues();
2285 if (!m->active && !m->inactive)
2286 vm_page_activate(m);
2287 vm_page_unlock_queues();
2288 PAGE_WAKEUP_DONE(m);
2289 vm_object_paging_end(object);
2290 vm_object_unlock(object);
2291
2292 copy->cpy_page_list[i] = new_m;
2293 }
2294 }
2295
2296 /*
2297 * vm_map_copy_page_discard:
2298 *
2299 * Get rid of the pages in a page_list copy. If the pages are
2300 * stolen, they are freed. If the pages are not stolen, they
2301 * are unbusied, and associated state is cleaned up.
2302 */
2303 void vm_map_copy_page_discard(copy)
2304 vm_map_copy_t copy;
2305 {
2306 while (copy->cpy_npages > 0) {
2307 vm_page_t m;
2308
2309 if((m = copy->cpy_page_list[--(copy->cpy_npages)]) !=
2310 VM_PAGE_NULL) {
2311
2312 /*
2313 * If it's not in the table, then it's
2314 * a stolen page that goes back
2315 * to the free list. Else it belongs
2316 * to some object, and we hold a
2317 * paging reference on that object.
2318 */
2319 if (!m->tabled) {
2320 VM_PAGE_FREE(m);
2321 }
2322 else {
2323 vm_object_t object;
2324
2325 object = m->object;
2326
2327 vm_object_lock(object);
2328 vm_page_lock_queues();
2329 if (!m->active && !m->inactive)
2330 vm_page_activate(m);
2331 vm_page_unlock_queues();
2332
2333 PAGE_WAKEUP_DONE(m);
2334 vm_object_paging_end(object);
2335 vm_object_unlock(object);
2336 }
2337 }
2338 }
2339 }
2340
2341 /*
2342 * Routine: vm_map_copy_discard
2343 *
2344 * Description:
2345 * Dispose of a map copy object (returned by
2346 * vm_map_copyin).
2347 */
2348 void
2349 vm_map_copy_discard(copy)
2350 vm_map_copy_t copy;
2351 {
2352 free_next_copy:
2353 if (copy == VM_MAP_COPY_NULL)
2354 return;
2355
2356 switch (copy->type) {
2357 case VM_MAP_COPY_ENTRY_LIST:
2358 while (vm_map_copy_first_entry(copy) !=
2359 vm_map_copy_to_entry(copy)) {
2360 vm_map_entry_t entry = vm_map_copy_first_entry(copy);
2361
2362 vm_map_copy_entry_unlink(copy, entry);
2363 vm_object_deallocate(entry->object.vm_object);
2364 vm_map_copy_entry_dispose(copy, entry);
2365 }
2366 break;
2367 case VM_MAP_COPY_OBJECT:
2368 vm_object_deallocate(copy->cpy_object);
2369 break;
2370 case VM_MAP_COPY_PAGE_LIST:
2371
2372 /*
2373 * To clean this up, we have to unbusy all the pages
2374 * and release the paging references in their objects.
2375 */
2376 if (copy->cpy_npages > 0)
2377 vm_map_copy_page_discard(copy);
2378
2379 /*
2380 * If there's a continuation, abort it. The
2381 * abort routine releases any storage.
2382 */
2383 if (vm_map_copy_has_cont(copy)) {
2384
2385 /*
2386 * Special case: recognize
2387 * vm_map_copy_discard_cont and optimize
2388 * here to avoid tail recursion.
2389 */
2390 if (copy->cpy_cont == vm_map_copy_discard_cont) {
2391 register vm_map_copy_t new_copy;
2392
2393 new_copy = (vm_map_copy_t) copy->cpy_cont_args;
2394 zfree(vm_map_copy_zone, (vm_offset_t) copy);
2395 copy = new_copy;
2396 goto free_next_copy;
2397 }
2398 else {
2399 vm_map_copy_abort_cont(copy);
2400 }
2401 }
2402
2403 break;
2404 }
2405 zfree(vm_map_copy_zone, (vm_offset_t) copy);
2406 }
2407
2408 /*
2409 * Routine: vm_map_copy_copy
2410 *
2411 * Description:
2412 * Move the information in a map copy object to
2413 * a new map copy object, leaving the old one
2414 * empty.
2415 *
2416 * This is used by kernel routines that need
2417 * to look at out-of-line data (in copyin form)
2418 * before deciding whether to return SUCCESS.
2419 * If the routine returns FAILURE, the original
2420 * copy object will be deallocated; therefore,
2421 * these routines must make a copy of the copy
2422 * object and leave the original empty so that
2423 * deallocation will not fail.
2424 */
2425 vm_map_copy_t
2426 vm_map_copy_copy(copy)
2427 vm_map_copy_t copy;
2428 {
2429 vm_map_copy_t new_copy;
2430
2431 if (copy == VM_MAP_COPY_NULL)
2432 return VM_MAP_COPY_NULL;
2433
2434 /*
2435 * Allocate a new copy object, and copy the information
2436 * from the old one into it.
2437 */
2438
2439 new_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
2440 *new_copy = *copy;
2441
2442 if (copy->type == VM_MAP_COPY_ENTRY_LIST) {
2443 /*
2444 * The links in the entry chain must be
2445 * changed to point to the new copy object.
2446 */
2447 vm_map_copy_first_entry(copy)->vme_prev
2448 = vm_map_copy_to_entry(new_copy);
2449 vm_map_copy_last_entry(copy)->vme_next
2450 = vm_map_copy_to_entry(new_copy);
2451 }
2452
2453 /*
2454 * Change the old copy object into one that contains
2455 * nothing to be deallocated.
2456 */
2457 copy->type = VM_MAP_COPY_OBJECT;
2458 copy->cpy_object = VM_OBJECT_NULL;
2459
2460 /*
2461 * Return the new object.
2462 */
2463 return new_copy;
2464 }
2465
2466 /*
2467 * Routine: vm_map_copy_discard_cont
2468 *
2469 * Description:
2470 * A version of vm_map_copy_discard that can be called
2471 * as a continuation from a vm_map_copy page list.
2472 */
2473 kern_return_t vm_map_copy_discard_cont(cont_args, copy_result)
2474 vm_map_copyin_args_t cont_args;
2475 vm_map_copy_t *copy_result; /* OUT */
2476 {
2477 vm_map_copy_discard((vm_map_copy_t) cont_args);
2478 if (copy_result != (vm_map_copy_t *)0)
2479 *copy_result = VM_MAP_COPY_NULL;
2480 return(KERN_SUCCESS);
2481 }
2482
2483 /*
2484 * Routine: vm_map_copy_overwrite
2485 *
2486 * Description:
2487 * Copy the memory described by the map copy
2488 * object (copy; returned by vm_map_copyin) onto
2489 * the specified destination region (dst_map, dst_addr).
2490 * The destination must be writeable.
2491 *
2492 * Unlike vm_map_copyout, this routine actually
2493 * writes over previously-mapped memory. If the
2494 * previous mapping was to a permanent (user-supplied)
2495 * memory object, it is preserved.
2496 *
2497 * The attributes (protection and inheritance) of the
2498 * destination region are preserved.
2499 *
2500 * If successful, consumes the copy object.
2501 * Otherwise, the caller is responsible for it.
2502 *
2503 * Implementation notes:
2504 * To overwrite temporary virtual memory, it is
2505 * sufficient to remove the previous mapping and insert
2506 * the new copy. This replacement is done either on
2507 * the whole region (if no permanent virtual memory
2508 * objects are embedded in the destination region) or
2509 * in individual map entries.
2510 *
2511 * To overwrite permanent virtual memory, it is
2512 * necessary to copy each page, as the external
2513 * memory management interface currently does not
2514 * provide any optimizations.
2515 *
2516 * Once a page of permanent memory has been overwritten,
2517 * it is impossible to interrupt this function; otherwise,
2518 * the call would be neither atomic nor location-independent.
2519 * The kernel-state portion of a user thread must be
2520 * interruptible.
2521 *
2522 * It may be expensive to forward all requests that might
2523 * overwrite permanent memory (vm_write, vm_copy) to
2524 * uninterruptible kernel threads. This routine may be
2525 * called by interruptible threads; however, success is
2526 * not guaranteed -- if the request cannot be performed
2527 * atomically and interruptibly, an error indication is
2528 * returned.
2529 */
2530 kern_return_t vm_map_copy_overwrite(dst_map, dst_addr, copy, interruptible)
2531 vm_map_t dst_map;
2532 vm_offset_t dst_addr;
2533 vm_map_copy_t copy;
2534 boolean_t interruptible;
2535 {
2536 vm_size_t size;
2537 vm_offset_t start;
2538 vm_map_entry_t tmp_entry;
2539 vm_map_entry_t entry;
2540
2541 boolean_t contains_permanent_objects = FALSE;
2542
2543 interruptible = FALSE; /* XXX */
2544
2545 /*
2546 * Check for null copy object.
2547 */
2548
2549 if (copy == VM_MAP_COPY_NULL)
2550 return(KERN_SUCCESS);
2551
2552 /*
2553 * Only works for entry lists at the moment. Will
2554 * support page lists LATER.
2555 */
2556
2557 #if NORMA_IPC
2558 vm_map_convert_from_page_list(copy);
2559 #else
2560 assert(copy->type == VM_MAP_COPY_ENTRY_LIST);
2561 #endif
2562
2563 /*
2564 * Currently this routine only handles page-aligned
2565 * regions. Eventually, it should handle misalignments
2566 * by actually copying pages.
2567 */
2568
2569 if (!page_aligned(copy->offset) ||
2570 !page_aligned(copy->size) ||
2571 !page_aligned(dst_addr))
2572 return(KERN_INVALID_ARGUMENT);
2573
2574 size = copy->size;
2575
2576 if (size == 0) {
2577 vm_map_copy_discard(copy);
2578 return(KERN_SUCCESS);
2579 }
2580
2581 /*
2582 * Verify that the destination is all writeable
2583 * initially.
2584 */
2585 start_pass_1:
2586 vm_map_lock(dst_map);
2587 if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) {
2588 vm_map_unlock(dst_map);
2589 return(KERN_INVALID_ADDRESS);
2590 }
2591 vm_map_clip_start(dst_map, tmp_entry, dst_addr);
2592 for (entry = tmp_entry;;) {
2593 vm_size_t sub_size = (entry->vme_end - entry->vme_start);
2594 vm_map_entry_t next = entry->vme_next;
2595
2596 if ( ! (entry->protection & VM_PROT_WRITE)) {
2597 vm_map_unlock(dst_map);
2598 return(KERN_PROTECTION_FAILURE);
2599 }
2600
2601 /*
2602 * If the entry is in transition, we must wait
2603 * for it to exit that state. Anything could happen
2604 * when we unlock the map, so start over.
2605 */
2606 if (entry->in_transition) {
2607
2608 /*
2609 * Say that we are waiting, and wait for entry.
2610 */
2611 entry->needs_wakeup = TRUE;
2612 vm_map_entry_wait(dst_map, FALSE);
2613
2614 goto start_pass_1;
2615 }
2616
2617 if (size <= sub_size)
2618 break;
2619
2620 if ((next == vm_map_to_entry(dst_map)) ||
2621 (next->vme_start != entry->vme_end)) {
2622 vm_map_unlock(dst_map);
2623 return(KERN_INVALID_ADDRESS);
2624 }
2625
2626
2627 /*
2628 * Check for permanent objects in the destination.
2629 */
2630
2631 if ((entry->object.vm_object != VM_OBJECT_NULL) &&
2632 !entry->object.vm_object->temporary)
2633 contains_permanent_objects = TRUE;
2634
2635 size -= sub_size;
2636 entry = next;
2637 }
2638
2639 /*
2640 * If there are permanent objects in the destination, then
2641 * the copy cannot be interrupted.
2642 */
2643
2644 if (interruptible && contains_permanent_objects)
2645 return(KERN_FAILURE); /* XXX */
2646
2647 /*
2648 * XXXO If there are no permanent objects in the destination,
2649 * XXXO and the source and destination map entry zones match,
2650 * XXXO and the destination map entry is not shared,
2651 * XXXO then the map entries can be deleted and replaced
2652 * XXXO with those from the copy. The following code is the
2653 * XXXO basic idea of what to do, but there are lots of annoying
2654 * XXXO little details about getting protection and inheritance
2655 * XXXO right. Should add protection, inheritance, and sharing checks
2656 * XXXO to the above pass and make sure that no wiring is involved.
2657 */
2658 /*
2659 * if (!contains_permanent_objects &&
2660 * copy->cpy_hdr.entries_pageable == dst_map->hdr.entries_pageable) {
2661 *
2662 * *
2663 * * Run over copy and adjust entries. Steal code
2664 * * from vm_map_copyout() to do this.
2665 * *
2666 *
2667 * tmp_entry = tmp_entry->vme_prev;
2668 * vm_map_delete(dst_map, dst_addr, dst_addr + copy->size);
2669 * vm_map_copy_insert(dst_map, tmp_entry, copy);
2670 *
2671 * vm_map_unlock(dst_map);
2672 * vm_map_copy_discard(copy);
2673 * }
2674 */
2675 /*
2676 *
2677 * Make a second pass, overwriting the data
2678 * At the beginning of each loop iteration,
2679 * the next entry to be overwritten is "tmp_entry"
2680 * (initially, the value returned from the lookup above),
2681 * and the starting address expected in that entry
2682 * is "start".
2683 */
2684
2685 start = dst_addr;
2686
2687 while (vm_map_copy_first_entry(copy) != vm_map_copy_to_entry(copy)) {
2688 vm_map_entry_t copy_entry = vm_map_copy_first_entry(copy);
2689 vm_size_t copy_size = (copy_entry->vme_end - copy_entry->vme_start);
2690 vm_object_t object;
2691
2692 entry = tmp_entry;
2693 size = (entry->vme_end - entry->vme_start);
2694 /*
2695 * Make sure that no holes popped up in the
2696 * address map, and that the protection is
2697 * still valid, in case the map was unlocked
2698 * earlier.
2699 */
2700
2701 if (entry->vme_start != start) {
2702 vm_map_unlock(dst_map);
2703 return(KERN_INVALID_ADDRESS);
2704 }
2705 assert(entry != vm_map_to_entry(dst_map));
2706
2707 /*
2708 * Check protection again
2709 */
2710
2711 if ( ! (entry->protection & VM_PROT_WRITE)) {
2712 vm_map_unlock(dst_map);
2713 return(KERN_PROTECTION_FAILURE);
2714 }
2715
2716 /*
2717 * Adjust to source size first
2718 */
2719
2720 if (copy_size < size) {
2721 vm_map_clip_end(dst_map, entry, entry->vme_start + copy_size);
2722 size = copy_size;
2723 }
2724
2725 /*
2726 * Adjust to destination size
2727 */
2728
2729 if (size < copy_size) {
2730 vm_map_copy_clip_end(copy, copy_entry,
2731 copy_entry->vme_start + size);
2732 copy_size = size;
2733 }
2734
2735 assert((entry->vme_end - entry->vme_start) == size);
2736 assert((tmp_entry->vme_end - tmp_entry->vme_start) == size);
2737 assert((copy_entry->vme_end - copy_entry->vme_start) == size);
2738
2739 /*
2740 * If the destination contains temporary unshared memory,
2741 * we can perform the copy by throwing it away and
2742 * installing the source data.
2743 */
2744
2745 object = entry->object.vm_object;
2746 if (!entry->is_shared &&
2747 ((object == VM_OBJECT_NULL) || object->temporary)) {
2748 vm_object_t old_object = entry->object.vm_object;
2749 vm_offset_t old_offset = entry->offset;
2750
2751 entry->object = copy_entry->object;
2752 entry->offset = copy_entry->offset;
2753 entry->needs_copy = copy_entry->needs_copy;
2754 entry->wired_count = 0;
2755 entry->user_wired_count = 0;
2756
2757 vm_map_copy_entry_unlink(copy, copy_entry);
2758 vm_map_copy_entry_dispose(copy, copy_entry);
2759
2760 vm_object_pmap_protect(
2761 old_object,
2762 old_offset,
2763 size,
2764 dst_map->pmap,
2765 tmp_entry->vme_start,
2766 VM_PROT_NONE);
2767
2768 vm_object_deallocate(old_object);
2769
2770 /*
2771 * Set up for the next iteration. The map
2772 * has not been unlocked, so the next
2773 * address should be at the end of this
2774 * entry, and the next map entry should be
2775 * the one following it.
2776 */
2777
2778 start = tmp_entry->vme_end;
2779 tmp_entry = tmp_entry->vme_next;
2780 } else {
2781 vm_map_version_t version;
2782 vm_object_t dst_object = entry->object.vm_object;
2783 vm_offset_t dst_offset = entry->offset;
2784 kern_return_t r;
2785
2786 /*
2787 * Take an object reference, and record
2788 * the map version information so that the
2789 * map can be safely unlocked.
2790 */
2791
2792 vm_object_reference(dst_object);
2793
2794 version.main_timestamp = dst_map->timestamp;
2795
2796 vm_map_unlock(dst_map);
2797
2798 /*
2799 * Copy as much as possible in one pass
2800 */
2801
2802 copy_size = size;
2803 r = vm_fault_copy(
2804 copy_entry->object.vm_object,
2805 copy_entry->offset,
2806 ©_size,
2807 dst_object,
2808 dst_offset,
2809 dst_map,
2810 &version,
2811 FALSE /* XXX interruptible */ );
2812
2813 /*
2814 * Release the object reference
2815 */
2816
2817 vm_object_deallocate(dst_object);
2818
2819 /*
2820 * If a hard error occurred, return it now
2821 */
2822
2823 if (r != KERN_SUCCESS)
2824 return(r);
2825
2826 if (copy_size != 0) {
2827 /*
2828 * Dispose of the copied region
2829 */
2830
2831 vm_map_copy_clip_end(copy, copy_entry,
2832 copy_entry->vme_start + copy_size);
2833 vm_map_copy_entry_unlink(copy, copy_entry);
2834 vm_object_deallocate(copy_entry->object.vm_object);
2835 vm_map_copy_entry_dispose(copy, copy_entry);
2836 }
2837
2838 /*
2839 * Pick up in the destination map where we left off.
2840 *
2841 * Use the version information to avoid a lookup
2842 * in the normal case.
2843 */
2844
2845 start += copy_size;
2846 vm_map_lock(dst_map);
2847 if ((version.main_timestamp + 1) == dst_map->timestamp) {
2848 /* We can safely use saved tmp_entry value */
2849
2850 vm_map_clip_end(dst_map, tmp_entry, start);
2851 tmp_entry = tmp_entry->vme_next;
2852 } else {
2853 /* Must do lookup of tmp_entry */
2854
2855 if (!vm_map_lookup_entry(dst_map, start, &tmp_entry)) {
2856 vm_map_unlock(dst_map);
2857 return(KERN_INVALID_ADDRESS);
2858 }
2859 vm_map_clip_start(dst_map, tmp_entry, start);
2860 }
2861 }
2862
2863 }
2864 vm_map_unlock(dst_map);
2865
2866 /*
2867 * Throw away the vm_map_copy object
2868 */
2869 vm_map_copy_discard(copy);
2870
2871 return(KERN_SUCCESS);
2872 }
2873
2874 /*
2875 * Macro: vm_map_copy_insert
2876 *
2877 * Description:
2878 * Link a copy chain ("copy") into a map at the
2879 * specified location (after "where").
2880 * Side effects:
2881 * The copy chain is destroyed.
2882 * Warning:
2883 * The arguments are evaluated multiple times.
2884 */
2885 #define vm_map_copy_insert(map, where, copy) \
2886 MACRO_BEGIN \
2887 (((where)->vme_next)->vme_prev = vm_map_copy_last_entry(copy)) \
2888 ->vme_next = ((where)->vme_next); \
2889 ((where)->vme_next = vm_map_copy_first_entry(copy)) \
2890 ->vme_prev = (where); \
2891 (map)->hdr.nentries += (copy)->cpy_hdr.nentries; \
2892 zfree(vm_map_copy_zone, (vm_offset_t) copy); \
2893 MACRO_END
2894
2895 /*
2896 * Routine: vm_map_copyout
2897 *
2898 * Description:
2899 * Copy out a copy chain ("copy") into newly-allocated
2900 * space in the destination map.
2901 *
2902 * If successful, consumes the copy object.
2903 * Otherwise, the caller is responsible for it.
2904 */
2905 kern_return_t vm_map_copyout(dst_map, dst_addr, copy)
2906 register
2907 vm_map_t dst_map;
2908 vm_offset_t *dst_addr; /* OUT */
2909 register
2910 vm_map_copy_t copy;
2911 {
2912 vm_size_t size;
2913 vm_size_t adjustment;
2914 vm_offset_t start;
2915 vm_offset_t vm_copy_start;
2916 vm_map_entry_t last;
2917 register
2918 vm_map_entry_t entry;
2919
2920 /*
2921 * Check for null copy object.
2922 */
2923
2924 if (copy == VM_MAP_COPY_NULL) {
2925 *dst_addr = 0;
2926 return(KERN_SUCCESS);
2927 }
2928
2929 /*
2930 * Check for special copy object, created
2931 * by vm_map_copyin_object.
2932 */
2933
2934 if (copy->type == VM_MAP_COPY_OBJECT) {
2935 vm_object_t object = copy->cpy_object;
2936 vm_size_t offset = copy->offset;
2937 vm_size_t tmp_size = copy->size;
2938 kern_return_t kr;
2939
2940 *dst_addr = 0;
2941 kr = vm_map_enter(dst_map, dst_addr, tmp_size,
2942 (vm_offset_t) 0, TRUE,
2943 object, offset, FALSE,
2944 VM_PROT_DEFAULT, VM_PROT_ALL,
2945 VM_INHERIT_DEFAULT);
2946 if (kr != KERN_SUCCESS)
2947 return(kr);
2948 zfree(vm_map_copy_zone, (vm_offset_t) copy);
2949 return(KERN_SUCCESS);
2950 }
2951
2952 if (copy->type == VM_MAP_COPY_PAGE_LIST)
2953 return(vm_map_copyout_page_list(dst_map, dst_addr, copy));
2954
2955 /*
2956 * Find space for the data
2957 */
2958
2959 vm_copy_start = trunc_page(copy->offset);
2960 size = round_page(copy->offset + copy->size) - vm_copy_start;
2961
2962 StartAgain: ;
2963
2964 vm_map_lock(dst_map);
2965 start = ((last = dst_map->first_free) == vm_map_to_entry(dst_map)) ?
2966 vm_map_min(dst_map) : last->vme_end;
2967
2968 while (TRUE) {
2969 vm_map_entry_t next = last->vme_next;
2970 vm_offset_t end = start + size;
2971
2972 if ((end > dst_map->max_offset) || (end < start)) {
2973 if (dst_map->wait_for_space) {
2974 if (size <= (dst_map->max_offset - dst_map->min_offset)) {
2975 assert_wait((event_t) dst_map, TRUE);
2976 vm_map_unlock(dst_map);
2977 thread_block((void (*)()) 0);
2978 goto StartAgain;
2979 }
2980 }
2981 vm_map_unlock(dst_map);
2982 return(KERN_NO_SPACE);
2983 }
2984
2985 if ((next == vm_map_to_entry(dst_map)) ||
2986 (next->vme_start >= end))
2987 break;
2988
2989 last = next;
2990 start = last->vme_end;
2991 }
2992
2993 /*
2994 * Since we're going to just drop the map
2995 * entries from the copy into the destination
2996 * map, they must come from the same pool.
2997 */
2998
2999 if (copy->cpy_hdr.entries_pageable != dst_map->hdr.entries_pageable) {
3000 /*
3001 * Mismatches occur when dealing with the default
3002 * pager.
3003 */
3004 zone_t old_zone;
3005 vm_map_entry_t next, new;
3006
3007 /*
3008 * Find the zone that the copies were allocated from
3009 */
3010 old_zone = (copy->cpy_hdr.entries_pageable)
3011 ? vm_map_entry_zone
3012 : vm_map_kentry_zone;
3013 entry = vm_map_copy_first_entry(copy);
3014
3015 /*
3016 * Reinitialize the copy so that vm_map_copy_entry_link
3017 * will work.
3018 */
3019 copy->cpy_hdr.nentries = 0;
3020 copy->cpy_hdr.entries_pageable = dst_map->hdr.entries_pageable;
3021 vm_map_copy_first_entry(copy) =
3022 vm_map_copy_last_entry(copy) =
3023 vm_map_copy_to_entry(copy);
3024
3025 /*
3026 * Copy each entry.
3027 */
3028 while (entry != vm_map_copy_to_entry(copy)) {
3029 new = vm_map_copy_entry_create(copy);
3030 vm_map_entry_copy_full(new, entry);
3031 vm_map_copy_entry_link(copy,
3032 vm_map_copy_last_entry(copy),
3033 new);
3034 next = entry->vme_next;
3035 zfree(old_zone, (vm_offset_t) entry);
3036 entry = next;
3037 }
3038 }
3039
3040 /*
3041 * Adjust the addresses in the copy chain, and
3042 * reset the region attributes.
3043 */
3044
3045 adjustment = start - vm_copy_start;
3046 for (entry = vm_map_copy_first_entry(copy);
3047 entry != vm_map_copy_to_entry(copy);
3048 entry = entry->vme_next) {
3049 entry->vme_start += adjustment;
3050 entry->vme_end += adjustment;
3051
3052 entry->inheritance = VM_INHERIT_DEFAULT;
3053 entry->protection = VM_PROT_DEFAULT;
3054 entry->max_protection = VM_PROT_ALL;
3055 entry->projected_on = 0;
3056
3057 /*
3058 * If the entry is now wired,
3059 * map the pages into the destination map.
3060 */
3061 if (entry->wired_count != 0) {
3062 register vm_offset_t va;
3063 vm_offset_t offset;
3064 register vm_object_t object;
3065
3066 object = entry->object.vm_object;
3067 offset = entry->offset;
3068 va = entry->vme_start;
3069
3070 pmap_pageable(dst_map->pmap,
3071 entry->vme_start,
3072 entry->vme_end,
3073 TRUE);
3074
3075 while (va < entry->vme_end) {
3076 register vm_page_t m;
3077
3078 /*
3079 * Look up the page in the object.
3080 * Assert that the page will be found in the
3081 * top object:
3082 * either
3083 * the object was newly created by
3084 * vm_object_copy_slowly, and has
3085 * copies of all of the pages from
3086 * the source object
3087 * or
3088 * the object was moved from the old
3089 * map entry; because the old map
3090 * entry was wired, all of the pages
3091 * were in the top-level object.
3092 * (XXX not true if we wire pages for
3093 * reading)
3094 */
3095 vm_object_lock(object);
3096 vm_object_paging_begin(object);
3097
3098 m = vm_page_lookup(object, offset);
3099 if (m == VM_PAGE_NULL || m->wire_count == 0 ||
3100 m->absent)
3101 panic("vm_map_copyout: wiring 0x%x", m);
3102
3103 m->busy = TRUE;
3104 vm_object_unlock(object);
3105
3106 PMAP_ENTER(dst_map->pmap, va, m,
3107 entry->protection, TRUE);
3108
3109 vm_object_lock(object);
3110 PAGE_WAKEUP_DONE(m);
3111 /* the page is wired, so we don't have to activate */
3112 vm_object_paging_end(object);
3113 vm_object_unlock(object);
3114
3115 offset += PAGE_SIZE;
3116 va += PAGE_SIZE;
3117 }
3118 }
3119
3120
3121 }
3122
3123 /*
3124 * Correct the page alignment for the result
3125 */
3126
3127 *dst_addr = start + (copy->offset - vm_copy_start);
3128
3129 /*
3130 * Update the hints and the map size
3131 */
3132
3133 if (dst_map->first_free == last)
3134 dst_map->first_free = vm_map_copy_last_entry(copy);
3135 SAVE_HINT(dst_map, vm_map_copy_last_entry(copy));
3136
3137 dst_map->size += size;
3138
3139 /*
3140 * Link in the copy
3141 */
3142
3143 vm_map_copy_insert(dst_map, last, copy);
3144
3145 vm_map_unlock(dst_map);
3146
3147 /*
3148 * XXX If wiring_required, call vm_map_pageable
3149 */
3150
3151 return(KERN_SUCCESS);
3152 }
3153
3154 /*
3155 *
3156 * vm_map_copyout_page_list:
3157 *
3158 * Version of vm_map_copyout() for page list vm map copies.
3159 *
3160 */
3161 kern_return_t vm_map_copyout_page_list(dst_map, dst_addr, copy)
3162 register
3163 vm_map_t dst_map;
3164 vm_offset_t *dst_addr; /* OUT */
3165 register
3166 vm_map_copy_t copy;
3167 {
3168 vm_size_t size;
3169 vm_offset_t start;
3170 vm_offset_t end;
3171 vm_offset_t offset;
3172 vm_map_entry_t last;
3173 register
3174 vm_object_t object;
3175 vm_page_t *page_list, m;
3176 vm_map_entry_t entry;
3177 vm_offset_t old_last_offset;
3178 boolean_t cont_invoked, needs_wakeup = FALSE;
3179 kern_return_t result = KERN_SUCCESS;
3180 vm_map_copy_t orig_copy;
3181 vm_offset_t dst_offset;
3182 boolean_t must_wire;
3183
3184 /*
3185 * Make sure the pages are stolen, because we are
3186 * going to put them in a new object. Assume that
3187 * all pages are identical to first in this regard.
3188 */
3189
3190 page_list = ©->cpy_page_list[0];
3191 if ((*page_list)->tabled)
3192 vm_map_copy_steal_pages(copy);
3193
3194 /*
3195 * Find space for the data
3196 */
3197
3198 size = round_page(copy->offset + copy->size) -
3199 trunc_page(copy->offset);
3200 StartAgain:
3201 vm_map_lock(dst_map);
3202 must_wire = dst_map->wiring_required;
3203
3204 last = dst_map->first_free;
3205 if (last == vm_map_to_entry(dst_map)) {
3206 start = vm_map_min(dst_map);
3207 } else {
3208 start = last->vme_end;
3209 }
3210
3211 while (TRUE) {
3212 vm_map_entry_t next = last->vme_next;
3213 end = start + size;
3214
3215 if ((end > dst_map->max_offset) || (end < start)) {
3216 if (dst_map->wait_for_space) {
3217 if (size <= (dst_map->max_offset -
3218 dst_map->min_offset)) {
3219 assert_wait((event_t) dst_map, TRUE);
3220 vm_map_unlock(dst_map);
3221 thread_block((void (*)()) 0);
3222 goto StartAgain;
3223 }
3224 }
3225 vm_map_unlock(dst_map);
3226 return(KERN_NO_SPACE);
3227 }
3228
3229 if ((next == vm_map_to_entry(dst_map)) ||
3230 (next->vme_start >= end)) {
3231 break;
3232 }
3233
3234 last = next;
3235 start = last->vme_end;
3236 }
3237
3238 /*
3239 * See whether we can avoid creating a new entry (and object) by
3240 * extending one of our neighbors. [So far, we only attempt to
3241 * extend from below.]
3242 *
3243 * The code path below here is a bit twisted. If any of the
3244 * extension checks fails, we branch to create_object. If
3245 * it all works, we fall out the bottom and goto insert_pages.
3246 */
3247 if (last == vm_map_to_entry(dst_map) ||
3248 last->vme_end != start ||
3249 last->is_shared != FALSE ||
3250 last->is_sub_map != FALSE ||
3251 last->inheritance != VM_INHERIT_DEFAULT ||
3252 last->protection != VM_PROT_DEFAULT ||
3253 last->max_protection != VM_PROT_ALL ||
3254 (must_wire ? (last->wired_count != 1 ||
3255 last->user_wired_count != 1) :
3256 (last->wired_count != 0))) {
3257 goto create_object;
3258 }
3259
3260 /*
3261 * If this entry needs an object, make one.
3262 */
3263 if (last->object.vm_object == VM_OBJECT_NULL) {
3264 object = vm_object_allocate(
3265 (vm_size_t)(last->vme_end - last->vme_start + size));
3266 last->object.vm_object = object;
3267 last->offset = 0;
3268 vm_object_lock(object);
3269 }
3270 else {
3271 vm_offset_t prev_offset = last->offset;
3272 vm_size_t prev_size = start - last->vme_start;
3273 vm_size_t new_size;
3274
3275 /*
3276 * This is basically vm_object_coalesce.
3277 */
3278
3279 object = last->object.vm_object;
3280 vm_object_lock(object);
3281
3282 /*
3283 * Try to collapse the object first
3284 */
3285 vm_object_collapse(object);
3286
3287 /*
3288 * Can't coalesce if pages not mapped to
3289 * last may be in use anyway:
3290 * . more than one reference
3291 * . paged out
3292 * . shadows another object
3293 * . has a copy elsewhere
3294 * . paging references (pages might be in page-list)
3295 */
3296
3297 if ((object->ref_count > 1) ||
3298 object->pager_created ||
3299 (object->shadow != VM_OBJECT_NULL) ||
3300 (object->copy != VM_OBJECT_NULL) ||
3301 (object->paging_in_progress != 0)) {
3302 vm_object_unlock(object);
3303 goto create_object;
3304 }
3305
3306 /*
3307 * Extend the object if necessary. Don't have to call
3308 * vm_object_page_remove because the pages aren't mapped,
3309 * and vm_page_replace will free up any old ones it encounters.
3310 */
3311 new_size = prev_offset + prev_size + size;
3312 if (new_size > object->size)
3313 object->size = new_size;
3314 }
3315
3316 /*
3317 * Coalesced the two objects - can extend
3318 * the previous map entry to include the
3319 * new range.
3320 */
3321 dst_map->size += size;
3322 last->vme_end = end;
3323
3324 SAVE_HINT(dst_map, last);
3325
3326 goto insert_pages;
3327
3328 create_object:
3329
3330 /*
3331 * Create object
3332 */
3333 object = vm_object_allocate(size);
3334
3335 /*
3336 * Create entry
3337 */
3338
3339 entry = vm_map_entry_create(dst_map);
3340
3341 entry->object.vm_object = object;
3342 entry->offset = 0;
3343
3344 entry->is_shared = FALSE;
3345 entry->is_sub_map = FALSE;
3346 entry->needs_copy = FALSE;
3347
3348 if (must_wire) {
3349 entry->wired_count = 1;
3350 entry->user_wired_count = 1;
3351 } else {
3352 entry->wired_count = 0;
3353 entry->user_wired_count = 0;
3354 }
3355
3356 entry->in_transition = TRUE;
3357 entry->needs_wakeup = FALSE;
3358
3359 entry->vme_start = start;
3360 entry->vme_end = start + size;
3361
3362 entry->inheritance = VM_INHERIT_DEFAULT;
3363 entry->protection = VM_PROT_DEFAULT;
3364 entry->max_protection = VM_PROT_ALL;
3365 entry->projected_on = 0;
3366
3367 vm_object_lock(object);
3368
3369 /*
3370 * Update the hints and the map size
3371 */
3372 if (dst_map->first_free == last) {
3373 dst_map->first_free = entry;
3374 }
3375 SAVE_HINT(dst_map, entry);
3376 dst_map->size += size;
3377
3378 /*
3379 * Link in the entry
3380 */
3381 vm_map_entry_link(dst_map, last, entry);
3382 last = entry;
3383
3384 /*
3385 * Transfer pages into new object.
3386 * Scan page list in vm_map_copy.
3387 */
3388 insert_pages:
3389 dst_offset = copy->offset & page_mask;
3390 cont_invoked = FALSE;
3391 orig_copy = copy;
3392 last->in_transition = TRUE;
3393 old_last_offset = last->offset
3394 + (start - last->vme_start);
3395
3396 vm_page_lock_queues();
3397
3398 for (offset = 0; offset < size; offset += PAGE_SIZE) {
3399 m = *page_list;
3400 assert(m && !m->tabled);
3401
3402 /*
3403 * Must clear busy bit in page before inserting it.
3404 * Ok to skip wakeup logic because nobody else
3405 * can possibly know about this page.
3406 * The page is dirty in its new object.
3407 */
3408
3409 m->busy = FALSE;
3410 m->dirty = TRUE;
3411 vm_page_replace(m, object, old_last_offset + offset);
3412 if (must_wire)
3413 vm_page_wire(m);
3414 else
3415 vm_page_activate(m);
3416
3417 *page_list++ = VM_PAGE_NULL;
3418 if (--(copy->cpy_npages) == 0 &&
3419 vm_map_copy_has_cont(copy)) {
3420 vm_map_copy_t new_copy;
3421
3422 /*
3423 * Ok to unlock map because entry is
3424 * marked in_transition.
3425 */
3426 cont_invoked = TRUE;
3427 vm_page_unlock_queues();
3428 vm_object_unlock(object);
3429 vm_map_unlock(dst_map);
3430 vm_map_copy_invoke_cont(copy, &new_copy, &result);
3431
3432 if (result == KERN_SUCCESS) {
3433
3434 /*
3435 * If we got back a copy with real pages,
3436 * steal them now. Either all of the
3437 * pages in the list are tabled or none
3438 * of them are; mixtures are not possible.
3439 *
3440 * Save original copy for consume on
3441 * success logic at end of routine.
3442 */
3443 if (copy != orig_copy)
3444 vm_map_copy_discard(copy);
3445
3446 if ((copy = new_copy) != VM_MAP_COPY_NULL) {
3447 page_list = ©->cpy_page_list[0];
3448 if ((*page_list)->tabled)
3449 vm_map_copy_steal_pages(copy);
3450 }
3451 }
3452 else {
3453 /*
3454 * Continuation failed.
3455 */
3456 vm_map_lock(dst_map);
3457 goto error;
3458 }
3459
3460 vm_map_lock(dst_map);
3461 vm_object_lock(object);
3462 vm_page_lock_queues();
3463 }
3464 }
3465
3466 vm_page_unlock_queues();
3467 vm_object_unlock(object);
3468
3469 *dst_addr = start + dst_offset;
3470
3471 /*
3472 * Clear the in transition bits. This is easy if we
3473 * didn't have a continuation.
3474 */
3475 error:
3476 if (!cont_invoked) {
3477 /*
3478 * We didn't unlock the map, so nobody could
3479 * be waiting.
3480 */
3481 last->in_transition = FALSE;
3482 assert(!last->needs_wakeup);
3483 needs_wakeup = FALSE;
3484 }
3485 else {
3486 if (!vm_map_lookup_entry(dst_map, start, &entry))
3487 panic("vm_map_copyout_page_list: missing entry");
3488
3489 /*
3490 * Clear transition bit for all constituent entries that
3491 * were in the original entry. Also check for waiters.
3492 */
3493 while((entry != vm_map_to_entry(dst_map)) &&
3494 (entry->vme_start < end)) {
3495 assert(entry->in_transition);
3496 entry->in_transition = FALSE;
3497 if(entry->needs_wakeup) {
3498 entry->needs_wakeup = FALSE;
3499 needs_wakeup = TRUE;
3500 }
3501 entry = entry->vme_next;
3502 }
3503 }
3504
3505 if (result != KERN_SUCCESS)
3506 vm_map_delete(dst_map, start, end);
3507
3508 vm_map_unlock(dst_map);
3509
3510 if (needs_wakeup)
3511 vm_map_entry_wakeup(dst_map);
3512
3513 /*
3514 * Consume on success logic.
3515 */
3516 if (copy != orig_copy) {
3517 zfree(vm_map_copy_zone, (vm_offset_t) copy);
3518 }
3519 if (result == KERN_SUCCESS) {
3520 zfree(vm_map_copy_zone, (vm_offset_t) orig_copy);
3521 }
3522
3523 return(result);
3524 }
3525
3526 /*
3527 * Routine: vm_map_copyin
3528 *
3529 * Description:
3530 * Copy the specified region (src_addr, len) from the
3531 * source address space (src_map), possibly removing
3532 * the region from the source address space (src_destroy).
3533 *
3534 * Returns:
3535 * A vm_map_copy_t object (copy_result), suitable for
3536 * insertion into another address space (using vm_map_copyout),
3537 * copying over another address space region (using
3538 * vm_map_copy_overwrite). If the copy is unused, it
3539 * should be destroyed (using vm_map_copy_discard).
3540 *
3541 * In/out conditions:
3542 * The source map should not be locked on entry.
3543 */
3544 kern_return_t vm_map_copyin(src_map, src_addr, len, src_destroy, copy_result)
3545 vm_map_t src_map;
3546 vm_offset_t src_addr;
3547 vm_size_t len;
3548 boolean_t src_destroy;
3549 vm_map_copy_t *copy_result; /* OUT */
3550 {
3551 vm_map_entry_t tmp_entry; /* Result of last map lookup --
3552 * in multi-level lookup, this
3553 * entry contains the actual
3554 * vm_object/offset.
3555 */
3556
3557 vm_offset_t src_start; /* Start of current entry --
3558 * where copy is taking place now
3559 */
3560 vm_offset_t src_end; /* End of entire region to be
3561 * copied */
3562
3563 register
3564 vm_map_copy_t copy; /* Resulting copy */
3565
3566 /*
3567 * Check for copies of zero bytes.
3568 */
3569
3570 if (len == 0) {
3571 *copy_result = VM_MAP_COPY_NULL;
3572 return(KERN_SUCCESS);
3573 }
3574
3575 /*
3576 * Compute start and end of region
3577 */
3578
3579 src_start = trunc_page(src_addr);
3580 src_end = round_page(src_addr + len);
3581
3582 /*
3583 * Check that the end address doesn't overflow
3584 */
3585
3586 if (src_end <= src_start)
3587 if ((src_end < src_start) || (src_start != 0))
3588 return(KERN_INVALID_ADDRESS);
3589
3590 /*
3591 * Allocate a header element for the list.
3592 *
3593 * Use the start and end in the header to
3594 * remember the endpoints prior to rounding.
3595 */
3596
3597 copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
3598 vm_map_copy_first_entry(copy) =
3599 vm_map_copy_last_entry(copy) = vm_map_copy_to_entry(copy);
3600 copy->type = VM_MAP_COPY_ENTRY_LIST;
3601 copy->cpy_hdr.nentries = 0;
3602 copy->cpy_hdr.entries_pageable = TRUE;
3603
3604 copy->offset = src_addr;
3605 copy->size = len;
3606
3607 #define RETURN(x) \
3608 MACRO_BEGIN \
3609 vm_map_unlock(src_map); \
3610 vm_map_copy_discard(copy); \
3611 MACRO_RETURN(x); \
3612 MACRO_END
3613
3614 /*
3615 * Find the beginning of the region.
3616 */
3617
3618 vm_map_lock(src_map);
3619
3620 if (!vm_map_lookup_entry(src_map, src_start, &tmp_entry))
3621 RETURN(KERN_INVALID_ADDRESS);
3622 vm_map_clip_start(src_map, tmp_entry, src_start);
3623
3624 /*
3625 * Go through entries until we get to the end.
3626 */
3627
3628 while (TRUE) {
3629 register
3630 vm_map_entry_t src_entry = tmp_entry; /* Top-level entry */
3631 vm_size_t src_size; /* Size of source
3632 * map entry (in both
3633 * maps)
3634 */
3635
3636 register
3637 vm_object_t src_object; /* Object to copy */
3638 vm_offset_t src_offset;
3639
3640 boolean_t src_needs_copy; /* Should source map
3641 * be made read-only
3642 * for copy-on-write?
3643 */
3644
3645 register
3646 vm_map_entry_t new_entry; /* Map entry for copy */
3647 boolean_t new_entry_needs_copy; /* Will new entry be COW? */
3648
3649 boolean_t was_wired; /* Was source wired? */
3650 vm_map_version_t version; /* Version before locks
3651 * dropped to make copy
3652 */
3653
3654 /*
3655 * Verify that the region can be read.
3656 */
3657
3658 if (! (src_entry->protection & VM_PROT_READ))
3659 RETURN(KERN_PROTECTION_FAILURE);
3660
3661 /*
3662 * Clip against the endpoints of the entire region.
3663 */
3664
3665 vm_map_clip_end(src_map, src_entry, src_end);
3666
3667 src_size = src_entry->vme_end - src_start;
3668 src_object = src_entry->object.vm_object;
3669 src_offset = src_entry->offset;
3670 was_wired = (src_entry->wired_count != 0);
3671
3672 /*
3673 * Create a new address map entry to
3674 * hold the result. Fill in the fields from
3675 * the appropriate source entries.
3676 */
3677
3678 new_entry = vm_map_copy_entry_create(copy);
3679 vm_map_entry_copy(new_entry, src_entry);
3680
3681 /*
3682 * Attempt non-blocking copy-on-write optimizations.
3683 */
3684
3685 if (src_destroy &&
3686 (src_object == VM_OBJECT_NULL || src_object->temporary)) {
3687 /*
3688 * If we are destroying the source, and the object
3689 * is temporary, we can move the object reference
3690 * from the source to the copy. The copy is
3691 * copy-on-write only if the source is.
3692 * We make another reference to the object, because
3693 * destroying the source entry will deallocate it.
3694 */
3695 vm_object_reference(src_object);
3696
3697 /*
3698 * Copy is always unwired. vm_map_copy_entry
3699 * set its wired count to zero.
3700 */
3701
3702 goto CopySuccessful;
3703 }
3704
3705 if (!was_wired &&
3706 vm_object_copy_temporary(
3707 &new_entry->object.vm_object,
3708 &new_entry->offset,
3709 &src_needs_copy,
3710 &new_entry_needs_copy)) {
3711
3712 new_entry->needs_copy = new_entry_needs_copy;
3713
3714 /*
3715 * Handle copy-on-write obligations
3716 */
3717
3718 if (src_needs_copy && !tmp_entry->needs_copy) {
3719 vm_object_pmap_protect(
3720 src_object,
3721 src_offset,
3722 src_size,
3723 (src_entry->is_shared ? PMAP_NULL
3724 : src_map->pmap),
3725 src_entry->vme_start,
3726 src_entry->protection &
3727 ~VM_PROT_WRITE);
3728
3729 tmp_entry->needs_copy = TRUE;
3730 }
3731
3732 /*
3733 * The map has never been unlocked, so it's safe to
3734 * move to the next entry rather than doing another
3735 * lookup.
3736 */
3737
3738 goto CopySuccessful;
3739 }
3740
3741 new_entry->needs_copy = FALSE;
3742
3743 /*
3744 * Take an object reference, so that we may
3745 * release the map lock(s).
3746 */
3747
3748 assert(src_object != VM_OBJECT_NULL);
3749 vm_object_reference(src_object);
3750
3751 /*
3752 * Record the timestamp for later verification.
3753 * Unlock the map.
3754 */
3755
3756 version.main_timestamp = src_map->timestamp;
3757 vm_map_unlock(src_map);
3758
3759 /*
3760 * Perform the copy
3761 */
3762
3763 if (was_wired) {
3764 vm_object_lock(src_object);
3765 (void) vm_object_copy_slowly(
3766 src_object,
3767 src_offset,
3768 src_size,
3769 FALSE,
3770 &new_entry->object.vm_object);
3771 new_entry->offset = 0;
3772 new_entry->needs_copy = FALSE;
3773 } else {
3774 kern_return_t result;
3775
3776 result = vm_object_copy_strategically(src_object,
3777 src_offset,
3778 src_size,
3779 &new_entry->object.vm_object,
3780 &new_entry->offset,
3781 &new_entry_needs_copy);
3782
3783 new_entry->needs_copy = new_entry_needs_copy;
3784
3785
3786 if (result != KERN_SUCCESS) {
3787 vm_map_copy_entry_dispose(copy, new_entry);
3788
3789 vm_map_lock(src_map);
3790 RETURN(result);
3791 }
3792
3793 }
3794
3795 /*
3796 * Throw away the extra reference
3797 */
3798
3799 vm_object_deallocate(src_object);
3800
3801 /*
3802 * Verify that the map has not substantially
3803 * changed while the copy was being made.
3804 */
3805
3806 vm_map_lock(src_map); /* Increments timestamp once! */
3807
3808 if ((version.main_timestamp + 1) == src_map->timestamp)
3809 goto CopySuccessful;
3810
3811 /*
3812 * Simple version comparison failed.
3813 *
3814 * Retry the lookup and verify that the
3815 * same object/offset are still present.
3816 *
3817 * [Note: a memory manager that colludes with
3818 * the calling task can detect that we have
3819 * cheated. While the map was unlocked, the
3820 * mapping could have been changed and restored.]
3821 */
3822
3823 if (!vm_map_lookup_entry(src_map, src_start, &tmp_entry)) {
3824 vm_map_copy_entry_dispose(copy, new_entry);
3825 RETURN(KERN_INVALID_ADDRESS);
3826 }
3827
3828 src_entry = tmp_entry;
3829 vm_map_clip_start(src_map, src_entry, src_start);
3830
3831 if ((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE)
3832 goto VerificationFailed;
3833
3834 if (src_entry->vme_end < new_entry->vme_end)
3835 src_size = (new_entry->vme_end = src_entry->vme_end) - src_start;
3836
3837 if ((src_entry->object.vm_object != src_object) ||
3838 (src_entry->offset != src_offset) ) {
3839
3840 /*
3841 * Verification failed.
3842 *
3843 * Start over with this top-level entry.
3844 */
3845
3846 VerificationFailed: ;
3847
3848 vm_object_deallocate(new_entry->object.vm_object);
3849 vm_map_copy_entry_dispose(copy, new_entry);
3850 tmp_entry = src_entry;
3851 continue;
3852 }
3853
3854 /*
3855 * Verification succeeded.
3856 */
3857
3858 CopySuccessful: ;
3859
3860 /*
3861 * Link in the new copy entry.
3862 */
3863
3864 vm_map_copy_entry_link(copy, vm_map_copy_last_entry(copy),
3865 new_entry);
3866
3867 /*
3868 * Determine whether the entire region
3869 * has been copied.
3870 */
3871 src_start = new_entry->vme_end;
3872 if ((src_start >= src_end) && (src_end != 0))
3873 break;
3874
3875 /*
3876 * Verify that there are no gaps in the region
3877 */
3878
3879 tmp_entry = src_entry->vme_next;
3880 if (tmp_entry->vme_start != src_start)
3881 RETURN(KERN_INVALID_ADDRESS);
3882 }
3883
3884 /*
3885 * If the source should be destroyed, do it now, since the
3886 * copy was successful.
3887 */
3888 if (src_destroy)
3889 (void) vm_map_delete(src_map, trunc_page(src_addr), src_end);
3890
3891 vm_map_unlock(src_map);
3892
3893 *copy_result = copy;
3894 return(KERN_SUCCESS);
3895
3896 #undef RETURN
3897 }
3898
3899 /*
3900 * vm_map_copyin_object:
3901 *
3902 * Create a copy object from an object.
3903 * Our caller donates an object reference.
3904 */
3905
3906 kern_return_t vm_map_copyin_object(object, offset, size, copy_result)
3907 vm_object_t object;
3908 vm_offset_t offset; /* offset of region in object */
3909 vm_size_t size; /* size of region in object */
3910 vm_map_copy_t *copy_result; /* OUT */
3911 {
3912 vm_map_copy_t copy; /* Resulting copy */
3913
3914 /*
3915 * We drop the object into a special copy object
3916 * that contains the object directly. These copy objects
3917 * are distinguished by entries_pageable == FALSE
3918 * and null links.
3919 */
3920
3921 copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
3922 vm_map_copy_first_entry(copy) =
3923 vm_map_copy_last_entry(copy) = VM_MAP_ENTRY_NULL;
3924 copy->type = VM_MAP_COPY_OBJECT;
3925 copy->cpy_object = object;
3926 copy->offset = offset;
3927 copy->size = size;
3928
3929 *copy_result = copy;
3930 return(KERN_SUCCESS);
3931 }
3932
3933 /*
3934 * vm_map_copyin_page_list_cont:
3935 *
3936 * Continuation routine for vm_map_copyin_page_list.
3937 *
3938 * If vm_map_copyin_page_list can't fit the entire vm range
3939 * into a single page list object, it creates a continuation.
3940 * When the target of the operation has used the pages in the
3941 * initial page list, it invokes the continuation, which calls
3942 * this routine. If an error happens, the continuation is aborted
3943 * (abort arg to this routine is TRUE). To avoid deadlocks, the
3944 * pages are discarded from the initial page list before invoking
3945 * the continuation.
3946 *
3947 * NOTE: This is not the same sort of continuation used by
3948 * the scheduler.
3949 */
3950
3951 kern_return_t vm_map_copyin_page_list_cont(cont_args, copy_result)
3952 vm_map_copyin_args_t cont_args;
3953 vm_map_copy_t *copy_result; /* OUT */
3954 {
3955 kern_return_t result = 0; /* '=0' to quiet gcc warnings */
3956 register boolean_t do_abort, src_destroy, src_destroy_only;
3957
3958 /*
3959 * Check for cases that only require memory destruction.
3960 */
3961 do_abort = (copy_result == (vm_map_copy_t *) 0);
3962 src_destroy = (cont_args->destroy_len != (vm_size_t) 0);
3963 src_destroy_only = (cont_args->src_len == (vm_size_t) 0);
3964
3965 if (do_abort || src_destroy_only) {
3966 if (src_destroy)
3967 result = vm_map_remove(cont_args->map,
3968 cont_args->destroy_addr,
3969 cont_args->destroy_addr + cont_args->destroy_len);
3970 if (!do_abort)
3971 *copy_result = VM_MAP_COPY_NULL;
3972 }
3973 else {
3974 result = vm_map_copyin_page_list(cont_args->map,
3975 cont_args->src_addr, cont_args->src_len, src_destroy,
3976 cont_args->steal_pages, copy_result, TRUE);
3977
3978 if (src_destroy && !cont_args->steal_pages &&
3979 vm_map_copy_has_cont(*copy_result)) {
3980 vm_map_copyin_args_t new_args;
3981 /*
3982 * Transfer old destroy info.
3983 */
3984 new_args = (vm_map_copyin_args_t)
3985 (*copy_result)->cpy_cont_args;
3986 new_args->destroy_addr = cont_args->destroy_addr;
3987 new_args->destroy_len = cont_args->destroy_len;
3988 }
3989 }
3990
3991 vm_map_deallocate(cont_args->map);
3992 kfree((vm_offset_t)cont_args, sizeof(vm_map_copyin_args_data_t));
3993
3994 return(result);
3995 }
3996
3997 /*
3998 * vm_map_copyin_page_list:
3999 *
4000 * This is a variant of vm_map_copyin that copies in a list of pages.
4001 * If steal_pages is TRUE, the pages are only in the returned list.
4002 * If steal_pages is FALSE, the pages are busy and still in their
4003 * objects. A continuation may be returned if not all the pages fit:
4004 * the recipient of this copy_result must be prepared to deal with it.
4005 */
4006
4007 kern_return_t vm_map_copyin_page_list(src_map, src_addr, len, src_destroy,
4008 steal_pages, copy_result, is_cont)
4009 vm_map_t src_map;
4010 vm_offset_t src_addr;
4011 vm_size_t len;
4012 boolean_t src_destroy;
4013 boolean_t steal_pages;
4014 vm_map_copy_t *copy_result; /* OUT */
4015 boolean_t is_cont;
4016 {
4017 vm_map_entry_t src_entry;
4018 vm_page_t m;
4019 vm_offset_t src_start;
4020 vm_offset_t src_end;
4021 vm_size_t src_size;
4022 register
4023 vm_object_t src_object;
4024 register
4025 vm_offset_t src_offset;
4026 vm_offset_t src_last_offset;
4027 register
4028 vm_map_copy_t copy; /* Resulting copy */
4029 kern_return_t result = KERN_SUCCESS;
4030 boolean_t need_map_lookup;
4031 vm_map_copyin_args_t cont_args;
4032
4033 /*
4034 * If steal_pages is FALSE, this leaves busy pages in
4035 * the object. A continuation must be used if src_destroy
4036 * is true in this case (!steal_pages && src_destroy).
4037 *
4038 * XXX Still have a more general problem of what happens
4039 * XXX if the same page occurs twice in a list. Deadlock
4040 * XXX can happen if vm_fault_page was called. A
4041 * XXX possible solution is to use a continuation if vm_fault_page
4042 * XXX is called and we cross a map entry boundary.
4043 */
4044
4045 /*
4046 * Check for copies of zero bytes.
4047 */
4048
4049 if (len == 0) {
4050 *copy_result = VM_MAP_COPY_NULL;
4051 return(KERN_SUCCESS);
4052 }
4053
4054 /*
4055 * Compute start and end of region
4056 */
4057
4058 src_start = trunc_page(src_addr);
4059 src_end = round_page(src_addr + len);
4060
4061 /*
4062 * Check that the end address doesn't overflow
4063 */
4064
4065 if (src_end <= src_start && (src_end < src_start || src_start != 0)) {
4066 return KERN_INVALID_ADDRESS;
4067 }
4068
4069 /*
4070 * Allocate a header element for the page list.
4071 *
4072 * Record original offset and size, as caller may not
4073 * be page-aligned.
4074 */
4075
4076 copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
4077 copy->type = VM_MAP_COPY_PAGE_LIST;
4078 copy->cpy_npages = 0;
4079 copy->offset = src_addr;
4080 copy->size = len;
4081 copy->cpy_cont = ((kern_return_t (*)()) 0);
4082 copy->cpy_cont_args = (char *) VM_MAP_COPYIN_ARGS_NULL;
4083
4084 /*
4085 * Find the beginning of the region.
4086 */
4087
4088 do_map_lookup:
4089
4090 vm_map_lock(src_map);
4091
4092 if (!vm_map_lookup_entry(src_map, src_start, &src_entry)) {
4093 result = KERN_INVALID_ADDRESS;
4094 goto error;
4095 }
4096 need_map_lookup = FALSE;
4097
4098 /*
4099 * Go through entries until we get to the end.
4100 */
4101
4102 while (TRUE) {
4103
4104 if (! (src_entry->protection & VM_PROT_READ)) {
4105 result = KERN_PROTECTION_FAILURE;
4106 goto error;
4107 }
4108
4109 if (src_end > src_entry->vme_end)
4110 src_size = src_entry->vme_end - src_start;
4111 else
4112 src_size = src_end - src_start;
4113
4114 src_object = src_entry->object.vm_object;
4115 src_offset = src_entry->offset +
4116 (src_start - src_entry->vme_start);
4117
4118 /*
4119 * If src_object is NULL, allocate it now;
4120 * we're going to fault on it shortly.
4121 */
4122 if (src_object == VM_OBJECT_NULL) {
4123 src_object = vm_object_allocate((vm_size_t)
4124 src_entry->vme_end -
4125 src_entry->vme_start);
4126 src_entry->object.vm_object = src_object;
4127 }
4128
4129 /*
4130 * Iterate over pages. Fault in ones that aren't present.
4131 */
4132 src_last_offset = src_offset + src_size;
4133 for (; (src_offset < src_last_offset && !need_map_lookup);
4134 src_offset += PAGE_SIZE, src_start += PAGE_SIZE) {
4135
4136 if (copy->cpy_npages == VM_MAP_COPY_PAGE_LIST_MAX) {
4137 make_continuation:
4138 /*
4139 * At this point we have the max number of
4140 * pages busy for this thread that we're
4141 * willing to allow. Stop here and record
4142 * arguments for the remainder. Note:
4143 * this means that this routine isn't atomic,
4144 * but that's the breaks. Note that only
4145 * the first vm_map_copy_t that comes back
4146 * from this routine has the right offset
4147 * and size; those from continuations are
4148 * page rounded, and short by the amount
4149 * already done.
4150 *
4151 * Reset src_end so the src_destroy
4152 * code at the bottom doesn't do
4153 * something stupid.
4154 */
4155
4156 cont_args = (vm_map_copyin_args_t)
4157 kalloc(sizeof(vm_map_copyin_args_data_t));
4158 cont_args->map = src_map;
4159 vm_map_reference(src_map);
4160 cont_args->src_addr = src_start;
4161 cont_args->src_len = len - (src_start - src_addr);
4162 if (src_destroy) {
4163 cont_args->destroy_addr = cont_args->src_addr;
4164 cont_args->destroy_len = cont_args->src_len;
4165 }
4166 else {
4167 cont_args->destroy_addr = (vm_offset_t) 0;
4168 cont_args->destroy_len = (vm_offset_t) 0;
4169 }
4170 cont_args->steal_pages = steal_pages;
4171
4172 copy->cpy_cont_args = (char *) cont_args;
4173 copy->cpy_cont = vm_map_copyin_page_list_cont;
4174
4175 src_end = src_start;
4176 vm_map_clip_end(src_map, src_entry, src_end);
4177 break;
4178 }
4179
4180 /*
4181 * Try to find the page of data.
4182 */
4183 vm_object_lock(src_object);
4184 vm_object_paging_begin(src_object);
4185 if (((m = vm_page_lookup(src_object, src_offset)) !=
4186 VM_PAGE_NULL) && !m->busy && !m->fictitious &&
4187 !m->absent && !m->error) {
4188
4189 /*
4190 * This is the page. Mark it busy
4191 * and keep the paging reference on
4192 * the object whilst we do our thing.
4193 */
4194 m->busy = TRUE;
4195 }
4196 else {
4197 vm_prot_t result_prot;
4198 vm_page_t top_page;
4199 kern_return_t kr;
4200
4201 /*
4202 * Have to fault the page in; must
4203 * unlock the map to do so. While
4204 * the map is unlocked, anything
4205 * can happen, we must lookup the
4206 * map entry before continuing.
4207 */
4208 vm_map_unlock(src_map);
4209 need_map_lookup = TRUE;
4210 retry:
4211 result_prot = VM_PROT_READ;
4212
4213 kr = vm_fault_page(src_object, src_offset,
4214 VM_PROT_READ, FALSE, FALSE,
4215 &result_prot, &m, &top_page,
4216 FALSE, (void (*)()) 0);
4217 /*
4218 * Cope with what happened.
4219 */
4220 switch (kr) {
4221 case VM_FAULT_SUCCESS:
4222 break;
4223 case VM_FAULT_INTERRUPTED: /* ??? */
4224 case VM_FAULT_RETRY:
4225 vm_object_lock(src_object);
4226 vm_object_paging_begin(src_object);
4227 goto retry;
4228 case VM_FAULT_MEMORY_SHORTAGE:
4229 VM_PAGE_WAIT((void (*)()) 0);
4230 vm_object_lock(src_object);
4231 vm_object_paging_begin(src_object);
4232 goto retry;
4233 case VM_FAULT_FICTITIOUS_SHORTAGE:
4234 vm_page_more_fictitious();
4235 vm_object_lock(src_object);
4236 vm_object_paging_begin(src_object);
4237 goto retry;
4238 case VM_FAULT_MEMORY_ERROR:
4239 /*
4240 * Something broke. If this
4241 * is a continuation, return
4242 * a partial result if possible,
4243 * else fail the whole thing.
4244 * In the continuation case, the
4245 * next continuation call will
4246 * get this error if it persists.
4247 */
4248 vm_map_lock(src_map);
4249 if (is_cont &&
4250 copy->cpy_npages != 0)
4251 goto make_continuation;
4252
4253 result = KERN_MEMORY_ERROR;
4254 goto error;
4255 }
4256
4257 if (top_page != VM_PAGE_NULL) {
4258 vm_object_lock(src_object);
4259 VM_PAGE_FREE(top_page);
4260 vm_object_paging_end(src_object);
4261 vm_object_unlock(src_object);
4262 }
4263
4264 }
4265
4266 /*
4267 * The page is busy, its object is locked, and
4268 * we have a paging reference on it. Either
4269 * the map is locked, or need_map_lookup is
4270 * TRUE.
4271 *
4272 * Put the page in the page list.
4273 */
4274 copy->cpy_page_list[copy->cpy_npages++] = m;
4275 vm_object_unlock(m->object);
4276 }
4277
4278 /*
4279 * DETERMINE whether the entire region
4280 * has been copied.
4281 */
4282 if (src_start >= src_end && src_end != 0) {
4283 if (need_map_lookup)
4284 vm_map_lock(src_map);
4285 break;
4286 }
4287
4288 /*
4289 * If need_map_lookup is TRUE, have to start over with
4290 * another map lookup. Note that we dropped the map
4291 * lock (to call vm_fault_page) above only in this case.
4292 */
4293 if (need_map_lookup)
4294 goto do_map_lookup;
4295
4296 /*
4297 * Verify that there are no gaps in the region
4298 */
4299
4300 src_start = src_entry->vme_end;
4301 src_entry = src_entry->vme_next;
4302 if (src_entry->vme_start != src_start) {
4303 result = KERN_INVALID_ADDRESS;
4304 goto error;
4305 }
4306 }
4307
4308 /*
4309 * If steal_pages is true, make sure all
4310 * pages in the copy are not in any object
4311 * We try to remove them from the original
4312 * object, but we may have to copy them.
4313 *
4314 * At this point every page in the list is busy
4315 * and holds a paging reference to its object.
4316 * When we're done stealing, every page is busy,
4317 * and in no object (m->tabled == FALSE).
4318 */
4319 src_start = trunc_page(src_addr);
4320 if (steal_pages) {
4321 register int i;
4322 vm_offset_t unwire_end;
4323
4324 unwire_end = src_start;
4325 for (i = 0; i < copy->cpy_npages; i++) {
4326
4327 /*
4328 * Remove the page from its object if it
4329 * can be stolen. It can be stolen if:
4330 *
4331 * (1) The source is being destroyed,
4332 * the object is temporary, and
4333 * not shared.
4334 * (2) The page is not precious.
4335 *
4336 * The not shared check consists of two
4337 * parts: (a) there are no objects that
4338 * shadow this object. (b) it is not the
4339 * object in any shared map entries (i.e.,
4340 * use_shared_copy is not set).
4341 *
4342 * The first check (a) means that we can't
4343 * steal pages from objects that are not
4344 * at the top of their shadow chains. This
4345 * should not be a frequent occurrence.
4346 *
4347 * Stealing wired pages requires telling the
4348 * pmap module to let go of them.
4349 *
4350 * NOTE: stealing clean pages from objects
4351 * whose mappings survive requires a call to
4352 * the pmap module. Maybe later.
4353 */
4354 m = copy->cpy_page_list[i];
4355 src_object = m->object;
4356 vm_object_lock(src_object);
4357
4358 if (src_destroy &&
4359 src_object->temporary &&
4360 (!src_object->shadowed) &&
4361 (!src_object->use_shared_copy) &&
4362 !m->precious) {
4363 vm_offset_t page_vaddr;
4364
4365 page_vaddr = src_start + (i * PAGE_SIZE);
4366 if (m->wire_count > 0) {
4367
4368 assert(m->wire_count == 1);
4369 /*
4370 * In order to steal a wired
4371 * page, we have to unwire it
4372 * first. We do this inline
4373 * here because we have the page.
4374 *
4375 * Step 1: Unwire the map entry.
4376 * Also tell the pmap module
4377 * that this piece of the
4378 * pmap is pageable.
4379 */
4380 vm_object_unlock(src_object);
4381 if (page_vaddr >= unwire_end) {
4382 if (!vm_map_lookup_entry(src_map,
4383 page_vaddr, &src_entry))
4384 panic("vm_map_copyin_page_list: missing wired map entry");
4385
4386 vm_map_clip_start(src_map, src_entry,
4387 page_vaddr);
4388 vm_map_clip_end(src_map, src_entry,
4389 src_start + src_size);
4390
4391 assert(src_entry->wired_count > 0);
4392 src_entry->wired_count = 0;
4393 src_entry->user_wired_count = 0;
4394 unwire_end = src_entry->vme_end;
4395 pmap_pageable(vm_map_pmap(src_map),
4396 page_vaddr, unwire_end, TRUE);
4397 }
4398
4399 /*
4400 * Step 2: Unwire the page.
4401 * pmap_remove handles this for us.
4402 */
4403 vm_object_lock(src_object);
4404 }
4405
4406 /*
4407 * Don't need to remove the mapping;
4408 * vm_map_delete will handle it.
4409 *
4410 * Steal the page. Setting the wire count
4411 * to zero is vm_page_unwire without
4412 * activating the page.
4413 */
4414 vm_page_lock_queues();
4415 vm_page_remove(m);
4416 if (m->wire_count > 0) {
4417 m->wire_count = 0;
4418 vm_page_wire_count--;
4419 } else {
4420 VM_PAGE_QUEUES_REMOVE(m);
4421 }
4422 vm_page_unlock_queues();
4423 }
4424 else {
4425 /*
4426 * Have to copy this page. Have to
4427 * unlock the map while copying,
4428 * hence no further page stealing.
4429 * Hence just copy all the pages.
4430 * Unlock the map while copying;
4431 * This means no further page stealing.
4432 */
4433 vm_object_unlock(src_object);
4434 vm_map_unlock(src_map);
4435
4436 vm_map_copy_steal_pages(copy);
4437
4438 vm_map_lock(src_map);
4439 break;
4440 }
4441
4442 vm_object_paging_end(src_object);
4443 vm_object_unlock(src_object);
4444 }
4445
4446 /*
4447 * If the source should be destroyed, do it now, since the
4448 * copy was successful.
4449 */
4450
4451 if (src_destroy) {
4452 (void) vm_map_delete(src_map, src_start, src_end);
4453 }
4454 }
4455 else {
4456 /*
4457 * !steal_pages leaves busy pages in the map.
4458 * This will cause src_destroy to hang. Use
4459 * a continuation to prevent this.
4460 */
4461 if (src_destroy && !vm_map_copy_has_cont(copy)) {
4462 cont_args = (vm_map_copyin_args_t)
4463 kalloc(sizeof(vm_map_copyin_args_data_t));
4464 vm_map_reference(src_map);
4465 cont_args->map = src_map;
4466 cont_args->src_addr = (vm_offset_t) 0;
4467 cont_args->src_len = (vm_size_t) 0;
4468 cont_args->destroy_addr = src_start;
4469 cont_args->destroy_len = src_end - src_start;
4470 cont_args->steal_pages = FALSE;
4471
4472 copy->cpy_cont_args = (char *) cont_args;
4473 copy->cpy_cont = vm_map_copyin_page_list_cont;
4474 }
4475
4476 }
4477
4478 vm_map_unlock(src_map);
4479
4480 *copy_result = copy;
4481 return(result);
4482
4483 error:
4484 vm_map_unlock(src_map);
4485 vm_map_copy_discard(copy);
4486 return(result);
4487 }
4488
4489 /*
4490 * vm_map_fork:
4491 *
4492 * Create and return a new map based on the old
4493 * map, according to the inheritance values on the
4494 * regions in that map.
4495 *
4496 * The source map must not be locked.
4497 */
4498 vm_map_t vm_map_fork(old_map)
4499 vm_map_t old_map;
4500 {
4501 vm_map_t new_map;
4502 register
4503 vm_map_entry_t old_entry;
4504 register
4505 vm_map_entry_t new_entry;
4506 pmap_t new_pmap = pmap_create((vm_size_t) 0);
4507 vm_size_t new_size = 0;
4508 vm_size_t entry_size;
4509 register
4510 vm_object_t object;
4511
4512 vm_map_lock(old_map);
4513
4514 new_map = vm_map_create(new_pmap,
4515 old_map->min_offset,
4516 old_map->max_offset,
4517 old_map->hdr.entries_pageable);
4518
4519 for (
4520 old_entry = vm_map_first_entry(old_map);
4521 old_entry != vm_map_to_entry(old_map);
4522 ) {
4523 if (old_entry->is_sub_map)
4524 panic("vm_map_fork: encountered a submap");
4525
4526 entry_size = (old_entry->vme_end - old_entry->vme_start);
4527
4528 switch (old_entry->inheritance) {
4529 case VM_INHERIT_NONE:
4530 break;
4531
4532 case VM_INHERIT_SHARE:
4533 /*
4534 * New sharing code. New map entry
4535 * references original object. Temporary
4536 * objects use asynchronous copy algorithm for
4537 * future copies. First make sure we have
4538 * the right object. If we need a shadow,
4539 * or someone else already has one, then
4540 * make a new shadow and share it.
4541 */
4542
4543 object = old_entry->object.vm_object;
4544 if (object == VM_OBJECT_NULL) {
4545 object = vm_object_allocate(
4546 (vm_size_t)(old_entry->vme_end -
4547 old_entry->vme_start));
4548 old_entry->offset = 0;
4549 old_entry->object.vm_object = object;
4550 assert(!old_entry->needs_copy);
4551 }
4552 else if (old_entry->needs_copy || object->shadowed ||
4553 (object->temporary && !old_entry->is_shared &&
4554 object->size > (vm_size_t)(old_entry->vme_end -
4555 old_entry->vme_start))) {
4556
4557 assert(object->temporary);
4558 assert(!(object->shadowed && old_entry->is_shared));
4559 vm_object_shadow(
4560 &old_entry->object.vm_object,
4561 &old_entry->offset,
4562 (vm_size_t) (old_entry->vme_end -
4563 old_entry->vme_start));
4564
4565 /*
4566 * If we're making a shadow for other than
4567 * copy on write reasons, then we have
4568 * to remove write permission.
4569 */
4570
4571 if (!old_entry->needs_copy &&
4572 (old_entry->protection & VM_PROT_WRITE)) {
4573 pmap_protect(vm_map_pmap(old_map),
4574 old_entry->vme_start,
4575 old_entry->vme_end,
4576 old_entry->protection &
4577 ~VM_PROT_WRITE);
4578 }
4579 old_entry->needs_copy = FALSE;
4580 object = old_entry->object.vm_object;
4581 }
4582
4583 /*
4584 * Set use_shared_copy to indicate that
4585 * object must use shared (delayed) copy-on
4586 * write. This is ignored for permanent objects.
4587 * Bump the reference count for the new entry
4588 */
4589
4590 vm_object_lock(object);
4591 object->use_shared_copy = TRUE;
4592 object->ref_count++;
4593 vm_object_unlock(object);
4594
4595 if (old_entry->projected_on != 0) {
4596 /*
4597 * If entry is projected buffer, clone the
4598 * entry exactly.
4599 */
4600
4601 vm_map_entry_copy_full(new_entry, old_entry);
4602
4603 } else {
4604 /*
4605 * Clone the entry, using object ref from above.
4606 * Mark both entries as shared.
4607 */
4608
4609 new_entry = vm_map_entry_create(new_map);
4610 vm_map_entry_copy(new_entry, old_entry);
4611 old_entry->is_shared = TRUE;
4612 new_entry->is_shared = TRUE;
4613 }
4614
4615 /*
4616 * Insert the entry into the new map -- we
4617 * know we're inserting at the end of the new
4618 * map.
4619 */
4620
4621 vm_map_entry_link(
4622 new_map,
4623 vm_map_last_entry(new_map),
4624 new_entry);
4625
4626 /*
4627 * Update the physical map
4628 */
4629
4630 pmap_copy(new_map->pmap, old_map->pmap,
4631 new_entry->vme_start,
4632 entry_size,
4633 old_entry->vme_start);
4634
4635 new_size += entry_size;
4636 break;
4637
4638 case VM_INHERIT_COPY:
4639 if (old_entry->wired_count == 0) {
4640 boolean_t src_needs_copy;
4641 boolean_t new_entry_needs_copy;
4642
4643 new_entry = vm_map_entry_create(new_map);
4644 vm_map_entry_copy(new_entry, old_entry);
4645
4646 if (vm_object_copy_temporary(
4647 &new_entry->object.vm_object,
4648 &new_entry->offset,
4649 &src_needs_copy,
4650 &new_entry_needs_copy)) {
4651
4652 /*
4653 * Handle copy-on-write obligations
4654 */
4655
4656 if (src_needs_copy && !old_entry->needs_copy) {
4657 vm_object_pmap_protect(
4658 old_entry->object.vm_object,
4659 old_entry->offset,
4660 entry_size,
4661 (old_entry->is_shared ?
4662 PMAP_NULL :
4663 old_map->pmap),
4664 old_entry->vme_start,
4665 old_entry->protection &
4666 ~VM_PROT_WRITE);
4667
4668 old_entry->needs_copy = TRUE;
4669 }
4670
4671 new_entry->needs_copy = new_entry_needs_copy;
4672
4673 /*
4674 * Insert the entry at the end
4675 * of the map.
4676 */
4677
4678 vm_map_entry_link(new_map,
4679 vm_map_last_entry(new_map),
4680 new_entry);
4681
4682
4683 new_size += entry_size;
4684 break;
4685 }
4686
4687 vm_map_entry_dispose(new_map, new_entry);
4688 }
4689
4690 /* INNER BLOCK (copy cannot be optimized) */ {
4691
4692 vm_offset_t start = old_entry->vme_start;
4693 vm_map_copy_t copy;
4694 vm_map_entry_t last = vm_map_last_entry(new_map);
4695
4696 vm_map_unlock(old_map);
4697 if (vm_map_copyin(old_map,
4698 start,
4699 entry_size,
4700 FALSE,
4701 ©)
4702 != KERN_SUCCESS) {
4703 vm_map_lock(old_map);
4704 if (!vm_map_lookup_entry(old_map, start, &last))
4705 last = last->vme_next;
4706 old_entry = last;
4707 /*
4708 * For some error returns, want to
4709 * skip to the next element.
4710 */
4711
4712 continue;
4713 }
4714
4715 /*
4716 * Insert the copy into the new map
4717 */
4718
4719 vm_map_copy_insert(new_map, last, copy);
4720 new_size += entry_size;
4721
4722 /*
4723 * Pick up the traversal at the end of
4724 * the copied region.
4725 */
4726
4727 vm_map_lock(old_map);
4728 start += entry_size;
4729 if (!vm_map_lookup_entry(old_map, start, &last))
4730 last = last->vme_next;
4731 else
4732 vm_map_clip_start(old_map, last, start);
4733 old_entry = last;
4734
4735 continue;
4736 /* INNER BLOCK (copy cannot be optimized) */ }
4737 }
4738 old_entry = old_entry->vme_next;
4739 }
4740
4741 new_map->size = new_size;
4742 vm_map_unlock(old_map);
4743
4744 return(new_map);
4745 }
4746
4747 /*
4748 * vm_map_lookup:
4749 *
4750 * Finds the VM object, offset, and
4751 * protection for a given virtual address in the
4752 * specified map, assuming a page fault of the
4753 * type specified.
4754 *
4755 * Returns the (object, offset, protection) for
4756 * this address, whether it is wired down, and whether
4757 * this map has the only reference to the data in question.
4758 * In order to later verify this lookup, a "version"
4759 * is returned.
4760 *
4761 * The map should not be locked; it will not be
4762 * locked on exit. In order to guarantee the
4763 * existence of the returned object, it is returned
4764 * locked.
4765 *
4766 * If a lookup is requested with "write protection"
4767 * specified, the map may be changed to perform virtual
4768 * copying operations, although the data referenced will
4769 * remain the same.
4770 */
4771 kern_return_t vm_map_lookup(var_map, vaddr, fault_type, out_version,
4772 object, offset, out_prot, wired)
4773 vm_map_t *var_map; /* IN/OUT */
4774 register vm_offset_t vaddr;
4775 register vm_prot_t fault_type;
4776
4777 vm_map_version_t *out_version; /* OUT */
4778 vm_object_t *object; /* OUT */
4779 vm_offset_t *offset; /* OUT */
4780 vm_prot_t *out_prot; /* OUT */
4781 boolean_t *wired; /* OUT */
4782 {
4783 register vm_map_entry_t entry;
4784 register vm_map_t map = *var_map;
4785 register vm_prot_t prot;
4786
4787 RetryLookup: ;
4788
4789 /*
4790 * Lookup the faulting address.
4791 */
4792
4793 vm_map_lock_read(map);
4794
4795 #define RETURN(why) \
4796 { \
4797 vm_map_unlock_read(map); \
4798 return(why); \
4799 }
4800
4801 /*
4802 * If the map has an interesting hint, try it before calling
4803 * full blown lookup routine.
4804 */
4805
4806 simple_lock(&map->hint_lock);
4807 entry = map->hint;
4808 simple_unlock(&map->hint_lock);
4809
4810 if ((entry == vm_map_to_entry(map)) ||
4811 (vaddr < entry->vme_start) || (vaddr >= entry->vme_end)) {
4812 vm_map_entry_t tmp_entry;
4813
4814 /*
4815 * Entry was either not a valid hint, or the vaddr
4816 * was not contained in the entry, so do a full lookup.
4817 */
4818 if (!vm_map_lookup_entry(map, vaddr, &tmp_entry))
4819 RETURN(KERN_INVALID_ADDRESS);
4820
4821 entry = tmp_entry;
4822 }
4823
4824 /*
4825 * Handle submaps.
4826 */
4827
4828 if (entry->is_sub_map) {
4829 vm_map_t old_map = map;
4830
4831 *var_map = map = entry->object.sub_map;
4832 vm_map_unlock_read(old_map);
4833 goto RetryLookup;
4834 }
4835
4836 /*
4837 * Check whether this task is allowed to have
4838 * this page.
4839 */
4840
4841 prot = entry->protection;
4842 if ((fault_type & (prot)) != fault_type)
4843 RETURN(KERN_PROTECTION_FAILURE);
4844
4845 /*
4846 * If this page is not pageable, we have to get
4847 * it for all possible accesses.
4848 */
4849
4850 if (*wired = (entry->wired_count != 0))
4851 prot = fault_type = entry->protection;
4852
4853 /*
4854 * If the entry was copy-on-write, we either ...
4855 */
4856
4857 if (entry->needs_copy) {
4858 /*
4859 * If we want to write the page, we may as well
4860 * handle that now since we've got the map locked.
4861 *
4862 * If we don't need to write the page, we just
4863 * demote the permissions allowed.
4864 */
4865
4866 if (fault_type & VM_PROT_WRITE) {
4867 /*
4868 * Make a new object, and place it in the
4869 * object chain. Note that no new references
4870 * have appeared -- one just moved from the
4871 * map to the new object.
4872 */
4873
4874 if (vm_map_lock_read_to_write(map)) {
4875 goto RetryLookup;
4876 }
4877 map->timestamp++;
4878
4879 vm_object_shadow(
4880 &entry->object.vm_object,
4881 &entry->offset,
4882 (vm_size_t) (entry->vme_end - entry->vme_start));
4883
4884 entry->needs_copy = FALSE;
4885
4886 vm_map_lock_write_to_read(map);
4887 }
4888 else {
4889 /*
4890 * We're attempting to read a copy-on-write
4891 * page -- don't allow writes.
4892 */
4893
4894 prot &= (~VM_PROT_WRITE);
4895 }
4896 }
4897
4898 /*
4899 * Create an object if necessary.
4900 */
4901 if (entry->object.vm_object == VM_OBJECT_NULL) {
4902
4903 if (vm_map_lock_read_to_write(map)) {
4904 goto RetryLookup;
4905 }
4906
4907 entry->object.vm_object = vm_object_allocate(
4908 (vm_size_t)(entry->vme_end - entry->vme_start));
4909 entry->offset = 0;
4910 vm_map_lock_write_to_read(map);
4911 }
4912
4913 /*
4914 * Return the object/offset from this entry. If the entry
4915 * was copy-on-write or empty, it has been fixed up. Also
4916 * return the protection.
4917 */
4918
4919 *offset = (vaddr - entry->vme_start) + entry->offset;
4920 *object = entry->object.vm_object;
4921 *out_prot = prot;
4922
4923 /*
4924 * Lock the object to prevent it from disappearing
4925 */
4926
4927 vm_object_lock(*object);
4928
4929 /*
4930 * Save the version number and unlock the map.
4931 */
4932
4933 out_version->main_timestamp = map->timestamp;
4934
4935 RETURN(KERN_SUCCESS);
4936
4937 #undef RETURN
4938 }
4939
4940 /*
4941 * vm_map_verify:
4942 *
4943 * Verifies that the map in question has not changed
4944 * since the given version. If successful, the map
4945 * will not change until vm_map_verify_done() is called.
4946 */
4947 boolean_t vm_map_verify(map, version)
4948 register
4949 vm_map_t map;
4950 register
4951 vm_map_version_t *version; /* REF */
4952 {
4953 boolean_t result;
4954
4955 vm_map_lock_read(map);
4956 result = (map->timestamp == version->main_timestamp);
4957
4958 if (!result)
4959 vm_map_unlock_read(map);
4960
4961 return(result);
4962 }
4963
4964 /*
4965 * vm_map_verify_done:
4966 *
4967 * Releases locks acquired by a vm_map_verify.
4968 *
4969 * This is now a macro in vm/vm_map.h. It does a
4970 * vm_map_unlock_read on the map.
4971 */
4972
4973 /*
4974 * vm_region:
4975 *
4976 * User call to obtain information about a region in
4977 * a task's address map.
4978 */
4979
4980 kern_return_t vm_region(map, address, size,
4981 protection, max_protection,
4982 inheritance, is_shared,
4983 object_name, offset_in_object)
4984 vm_map_t map;
4985 vm_offset_t *address; /* IN/OUT */
4986 vm_size_t *size; /* OUT */
4987 vm_prot_t *protection; /* OUT */
4988 vm_prot_t *max_protection; /* OUT */
4989 vm_inherit_t *inheritance; /* OUT */
4990 boolean_t *is_shared; /* OUT */
4991 ipc_port_t *object_name; /* OUT */
4992 vm_offset_t *offset_in_object; /* OUT */
4993 {
4994 vm_map_entry_t tmp_entry;
4995 register
4996 vm_map_entry_t entry;
4997 register
4998 vm_offset_t tmp_offset;
4999 vm_offset_t start;
5000
5001 if (map == VM_MAP_NULL)
5002 return(KERN_INVALID_ARGUMENT);
5003
5004 start = *address;
5005
5006 vm_map_lock_read(map);
5007 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
5008 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
5009 vm_map_unlock_read(map);
5010 return(KERN_NO_SPACE);
5011 }
5012 } else {
5013 entry = tmp_entry;
5014 }
5015
5016 start = entry->vme_start;
5017 *protection = entry->protection;
5018 *max_protection = entry->max_protection;
5019 *inheritance = entry->inheritance;
5020 *address = start;
5021 *size = (entry->vme_end - start);
5022
5023 tmp_offset = entry->offset;
5024
5025
5026 if (entry->is_sub_map) {
5027 *is_shared = FALSE;
5028 *object_name = IP_NULL;
5029 *offset_in_object = tmp_offset;
5030 } else {
5031 *is_shared = entry->is_shared;
5032 *object_name = vm_object_name(entry->object.vm_object);
5033 *offset_in_object = tmp_offset;
5034 }
5035
5036 vm_map_unlock_read(map);
5037
5038 return(KERN_SUCCESS);
5039 }
5040
5041 /*
5042 * Routine: vm_map_simplify
5043 *
5044 * Description:
5045 * Attempt to simplify the map representation in
5046 * the vicinity of the given starting address.
5047 * Note:
5048 * This routine is intended primarily to keep the
5049 * kernel maps more compact -- they generally don't
5050 * benefit from the "expand a map entry" technology
5051 * at allocation time because the adjacent entry
5052 * is often wired down.
5053 */
5054 void vm_map_simplify(map, start)
5055 vm_map_t map;
5056 vm_offset_t start;
5057 {
5058 vm_map_entry_t this_entry;
5059 vm_map_entry_t prev_entry;
5060
5061 vm_map_lock(map);
5062 if (
5063 (vm_map_lookup_entry(map, start, &this_entry)) &&
5064 ((prev_entry = this_entry->vme_prev) != vm_map_to_entry(map)) &&
5065
5066 (prev_entry->vme_end == start) &&
5067
5068 (prev_entry->is_shared == FALSE) &&
5069 (prev_entry->is_sub_map == FALSE) &&
5070
5071 (this_entry->is_shared == FALSE) &&
5072 (this_entry->is_sub_map == FALSE) &&
5073
5074 (prev_entry->inheritance == this_entry->inheritance) &&
5075 (prev_entry->protection == this_entry->protection) &&
5076 (prev_entry->max_protection == this_entry->max_protection) &&
5077 (prev_entry->wired_count == this_entry->wired_count) &&
5078 (prev_entry->user_wired_count == this_entry->user_wired_count) &&
5079
5080 (prev_entry->needs_copy == this_entry->needs_copy) &&
5081
5082 (prev_entry->object.vm_object == this_entry->object.vm_object) &&
5083 ((prev_entry->offset + (prev_entry->vme_end - prev_entry->vme_start))
5084 == this_entry->offset) &&
5085 (prev_entry->projected_on == 0) &&
5086 (this_entry->projected_on == 0)
5087 ) {
5088 if (map->first_free == this_entry)
5089 map->first_free = prev_entry;
5090
5091 SAVE_HINT(map, prev_entry);
5092 vm_map_entry_unlink(map, this_entry);
5093 prev_entry->vme_end = this_entry->vme_end;
5094 vm_object_deallocate(this_entry->object.vm_object);
5095 vm_map_entry_dispose(map, this_entry);
5096 }
5097 vm_map_unlock(map);
5098 }
5099
5100
5101 /*
5102 * Routine: vm_map_machine_attribute
5103 * Purpose:
5104 * Provide machine-specific attributes to mappings,
5105 * such as cachability etc. for machines that provide
5106 * them. NUMA architectures and machines with big/strange
5107 * caches will use this.
5108 * Note:
5109 * Responsibilities for locking and checking are handled here,
5110 * everything else in the pmap module. If any non-volatile
5111 * information must be kept, the pmap module should handle
5112 * it itself. [This assumes that attributes do not
5113 * need to be inherited, which seems ok to me]
5114 */
5115 kern_return_t vm_map_machine_attribute(map, address, size, attribute, value)
5116 vm_map_t map;
5117 vm_offset_t address;
5118 vm_size_t size;
5119 vm_machine_attribute_t attribute;
5120 vm_machine_attribute_val_t* value; /* IN/OUT */
5121 {
5122 kern_return_t ret;
5123
5124 if (address < vm_map_min(map) ||
5125 (address + size) > vm_map_max(map))
5126 return KERN_INVALID_ARGUMENT;
5127
5128 vm_map_lock(map);
5129
5130 ret = pmap_attribute(map->pmap, address, size, attribute, value);
5131
5132 vm_map_unlock(map);
5133
5134 return ret;
5135 }
5136
5137 #include <mach_kdb.h>
5138
5139
5140 #if MACH_KDB
5141
5142 #define printf kdbprintf
5143
5144 /*
5145 * vm_map_print: [ debug ]
5146 */
5147 void vm_map_print(map)
5148 register vm_map_t map;
5149 {
5150 register vm_map_entry_t entry;
5151 extern int indent;
5152
5153 iprintf("Task map 0x%X: pmap=0x%X,",
5154 (vm_offset_t) map, (vm_offset_t) (map->pmap));
5155 printf("ref=%d,nentries=%d,", map->ref_count, map->hdr.nentries);
5156 printf("version=%d\n", map->timestamp);
5157 indent += 2;
5158 for (entry = vm_map_first_entry(map);
5159 entry != vm_map_to_entry(map);
5160 entry = entry->vme_next) {
5161 static char *inheritance_name[3] = { "share", "copy", "none"};
5162
5163 iprintf("map entry 0x%X: ", (vm_offset_t) entry);
5164 printf("start=0x%X, end=0x%X, ",
5165 (vm_offset_t) entry->vme_start, (vm_offset_t) entry->vme_end);
5166 printf("prot=%X/%X/%s, ",
5167 entry->protection,
5168 entry->max_protection,
5169 inheritance_name[entry->inheritance]);
5170 if (entry->wired_count != 0) {
5171 printf("wired(");
5172 if (entry->user_wired_count != 0)
5173 printf("u");
5174 if (entry->wired_count >
5175 ((entry->user_wired_count == 0) ? 0 : 1))
5176 printf("k");
5177 printf(") ");
5178 }
5179 if (entry->in_transition) {
5180 printf("in transition");
5181 if (entry->needs_wakeup)
5182 printf("(wake request)");
5183 printf(", ");
5184 }
5185 if (entry->is_sub_map) {
5186 printf("submap=0x%X, offset=0x%X\n",
5187 (vm_offset_t) entry->object.sub_map,
5188 (vm_offset_t) entry->offset);
5189 } else {
5190 printf("object=0x%X, offset=0x%X",
5191 (vm_offset_t) entry->object.vm_object,
5192 (vm_offset_t) entry->offset);
5193 if (entry->is_shared)
5194 printf(", shared");
5195 if (entry->needs_copy)
5196 printf(", copy needed");
5197 printf("\n");
5198
5199 if ((entry->vme_prev == vm_map_to_entry(map)) ||
5200 (entry->vme_prev->object.vm_object != entry->object.vm_object)) {
5201 indent += 2;
5202 vm_object_print(entry->object.vm_object);
5203 indent -= 2;
5204 }
5205 }
5206 }
5207 indent -= 2;
5208 }
5209
5210 /*
5211 * Routine: vm_map_copy_print
5212 * Purpose:
5213 * Pretty-print a copy object for ddb.
5214 */
5215
5216 void vm_map_copy_print(copy)
5217 vm_map_copy_t copy;
5218 {
5219 extern int indent;
5220 int i, npages;
5221
5222 printf("copy object 0x%x\n", copy);
5223
5224 indent += 2;
5225
5226 iprintf("type=%d", copy->type);
5227 switch (copy->type) {
5228 case VM_MAP_COPY_ENTRY_LIST:
5229 printf("[entry_list]");
5230 break;
5231
5232 case VM_MAP_COPY_OBJECT:
5233 printf("[object]");
5234 break;
5235
5236 case VM_MAP_COPY_PAGE_LIST:
5237 printf("[page_list]");
5238 break;
5239
5240 default:
5241 printf("[bad type]");
5242 break;
5243 }
5244 printf(", offset=0x%x", copy->offset);
5245 printf(", size=0x%x\n", copy->size);
5246
5247 switch (copy->type) {
5248 case VM_MAP_COPY_ENTRY_LIST:
5249 /* XXX add stuff here */
5250 break;
5251
5252 case VM_MAP_COPY_OBJECT:
5253 iprintf("object=0x%x\n", copy->cpy_object);
5254 break;
5255
5256 case VM_MAP_COPY_PAGE_LIST:
5257 iprintf("npages=%d", copy->cpy_npages);
5258 printf(", cont=%x", copy->cpy_cont);
5259 printf(", cont_args=%x\n", copy->cpy_cont_args);
5260 if (copy->cpy_npages < 0) {
5261 npages = 0;
5262 } else if (copy->cpy_npages > VM_MAP_COPY_PAGE_LIST_MAX) {
5263 npages = VM_MAP_COPY_PAGE_LIST_MAX;
5264 } else {
5265 npages = copy->cpy_npages;
5266 }
5267 iprintf("copy->cpy_page_list[0..%d] = {", npages);
5268 for (i = 0; i < npages - 1; i++) {
5269 printf("0x%x, ", copy->cpy_page_list[i]);
5270 }
5271 if (npages > 0) {
5272 printf("0x%x", copy->cpy_page_list[npages - 1]);
5273 }
5274 printf("}\n");
5275 break;
5276 }
5277
5278 indent -=2;
5279 }
5280 #endif MACH_KDB
5281
5282 #if NORMA_IPC
5283 /*
5284 * This should one day be eliminated;
5285 * we should always construct the right flavor of copy object
5286 * the first time. Troublesome areas include vm_read, where vm_map_copyin
5287 * is called without knowing whom the copy object is for.
5288 * There are also situations where we do want a lazy data structure
5289 * even if we are sending to a remote port...
5290 */
5291
5292 /*
5293 * Convert a copy to a page list. The copy argument is in/out
5294 * because we probably have to allocate a new vm_map_copy structure.
5295 * We take responsibility for discarding the old structure and
5296 * use a continuation to do so. Postponing this discard ensures
5297 * that the objects containing the pages we've marked busy will stick
5298 * around.
5299 */
5300 kern_return_t
5301 vm_map_convert_to_page_list(caller_copy)
5302 vm_map_copy_t *caller_copy;
5303 {
5304 vm_map_entry_t entry, next_entry;
5305 vm_offset_t va;
5306 vm_offset_t offset;
5307 vm_object_t object;
5308 kern_return_t result;
5309 vm_map_copy_t copy, new_copy;
5310 int i, num_pages = 0;
5311
5312 zone_t entry_zone;
5313
5314 copy = *caller_copy;
5315
5316 /*
5317 * We may not have to do anything,
5318 * or may not be able to do anything.
5319 */
5320 if (copy == VM_MAP_COPY_NULL || copy->type == VM_MAP_COPY_PAGE_LIST) {
5321 return KERN_SUCCESS;
5322 }
5323 if (copy->type == VM_MAP_COPY_OBJECT) {
5324 return vm_map_convert_to_page_list_from_object(caller_copy);
5325 }
5326 if (copy->type != VM_MAP_COPY_ENTRY_LIST) {
5327 panic("vm_map_convert_to_page_list: copy type %d!\n",
5328 copy->type);
5329 }
5330
5331 /*
5332 * Allocate the new copy. Set its continuation to
5333 * discard the old one.
5334 */
5335 new_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
5336 new_copy->type = VM_MAP_COPY_PAGE_LIST;
5337 new_copy->cpy_npages = 0;
5338 new_copy->offset = copy->offset;
5339 new_copy->size = copy->size;
5340 new_copy->cpy_cont = vm_map_copy_discard_cont;
5341 new_copy->cpy_cont_args = (char *) copy;
5342
5343 /*
5344 * Iterate over entries.
5345 */
5346 for (entry = vm_map_copy_first_entry(copy);
5347 entry != vm_map_copy_to_entry(copy);
5348 entry = entry->vme_next) {
5349
5350 object = entry->object.vm_object;
5351 offset = entry->offset;
5352 /*
5353 * Iterate over pages.
5354 */
5355 for (va = entry->vme_start;
5356 va < entry->vme_end;
5357 va += PAGE_SIZE, offset += PAGE_SIZE) {
5358
5359 vm_page_t m;
5360
5361 if (new_copy->cpy_npages == VM_MAP_COPY_PAGE_LIST_MAX) {
5362 /*
5363 * What a mess. We need a continuation
5364 * to do the page list, but also one
5365 * to discard the old copy. The right
5366 * thing to do is probably to copy
5367 * out the old copy into the kernel
5368 * map (or some temporary task holding
5369 * map if we're paranoid about large
5370 * copies), and then copyin the page
5371 * list that we really wanted with
5372 * src_destroy. LATER.
5373 */
5374 panic("vm_map_convert_to_page_list: num\n");
5375 }
5376
5377 /*
5378 * Try to find the page of data.
5379 */
5380 vm_object_lock(object);
5381 vm_object_paging_begin(object);
5382 if (((m = vm_page_lookup(object, offset)) !=
5383 VM_PAGE_NULL) && !m->busy && !m->fictitious &&
5384 !m->absent && !m->error) {
5385
5386 /*
5387 * This is the page. Mark it busy
5388 * and keep the paging reference on
5389 * the object whilst we do our thing.
5390 */
5391 m->busy = TRUE;
5392 }
5393 else {
5394 vm_prot_t result_prot;
5395 vm_page_t top_page;
5396 kern_return_t kr;
5397
5398 retry:
5399 result_prot = VM_PROT_READ;
5400
5401 kr = vm_fault_page(object, offset,
5402 VM_PROT_READ, FALSE, FALSE,
5403 &result_prot, &m, &top_page,
5404 FALSE, (void (*)()) 0);
5405 if (kr == VM_FAULT_MEMORY_SHORTAGE) {
5406 VM_PAGE_WAIT((void (*)()) 0);
5407 vm_object_lock(object);
5408 vm_object_paging_begin(object);
5409 goto retry;
5410 }
5411 if (kr != VM_FAULT_SUCCESS) {
5412 /* XXX what about data_error? */
5413 vm_object_lock(object);
5414 vm_object_paging_begin(object);
5415 goto retry;
5416 }
5417 if (top_page != VM_PAGE_NULL) {
5418 vm_object_lock(object);
5419 VM_PAGE_FREE(top_page);
5420 vm_object_paging_end(object);
5421 vm_object_unlock(object);
5422 }
5423 }
5424 assert(m);
5425 m->busy = TRUE;
5426 new_copy->cpy_page_list[new_copy->cpy_npages++] = m;
5427 vm_object_unlock(object);
5428 }
5429 }
5430
5431 *caller_copy = new_copy;
5432 return KERN_SUCCESS;
5433 }
5434
5435 kern_return_t
5436 vm_map_convert_to_page_list_from_object(caller_copy)
5437 vm_map_copy_t *caller_copy;
5438 {
5439 vm_object_t object;
5440 vm_offset_t offset;
5441 vm_map_copy_t copy, new_copy;
5442
5443 copy = *caller_copy;
5444 assert(copy->type == VM_MAP_COPY_OBJECT);
5445 object = copy->cpy_object;
5446 assert(object->size == round_page(object->size));
5447
5448 /*
5449 * Allocate the new copy. Set its continuation to
5450 * discard the old one.
5451 */
5452 new_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
5453 new_copy->type = VM_MAP_COPY_PAGE_LIST;
5454 new_copy->cpy_npages = 0;
5455 new_copy->offset = copy->offset;
5456 new_copy->size = copy->size;
5457 new_copy->cpy_cont = vm_map_copy_discard_cont;
5458 new_copy->cpy_cont_args = (char *) copy;
5459
5460 /*
5461 * XXX memory_object_lock_request can probably bust this
5462 * XXX See continuation comment in previous routine for solution.
5463 */
5464 assert(object->size <= VM_MAP_COPY_PAGE_LIST_MAX * PAGE_SIZE);
5465
5466 for (offset = 0; offset < object->size; offset += PAGE_SIZE) {
5467 vm_page_t m;
5468
5469 /*
5470 * Try to find the page of data.
5471 */
5472 vm_object_lock(object);
5473 vm_object_paging_begin(object);
5474 m = vm_page_lookup(object, offset);
5475 if ((m != VM_PAGE_NULL) && !m->busy && !m->fictitious &&
5476 !m->absent && !m->error) {
5477
5478 /*
5479 * This is the page. Mark it busy
5480 * and keep the paging reference on
5481 * the object whilst we do our thing.
5482 */
5483 m->busy = TRUE;
5484 }
5485 else {
5486 vm_prot_t result_prot;
5487 vm_page_t top_page;
5488 kern_return_t kr;
5489
5490 retry:
5491 result_prot = VM_PROT_READ;
5492
5493 kr = vm_fault_page(object, offset,
5494 VM_PROT_READ, FALSE, FALSE,
5495 &result_prot, &m, &top_page,
5496 FALSE, (void (*)()) 0);
5497 if (kr == VM_FAULT_MEMORY_SHORTAGE) {
5498 VM_PAGE_WAIT((void (*)()) 0);
5499 vm_object_lock(object);
5500 vm_object_paging_begin(object);
5501 goto retry;
5502 }
5503 if (kr != VM_FAULT_SUCCESS) {
5504 /* XXX what about data_error? */
5505 vm_object_lock(object);
5506 vm_object_paging_begin(object);
5507 goto retry;
5508 }
5509
5510 if (top_page != VM_PAGE_NULL) {
5511 vm_object_lock(object);
5512 VM_PAGE_FREE(top_page);
5513 vm_object_paging_end(object);
5514 vm_object_unlock(object);
5515 }
5516 }
5517 assert(m);
5518 m->busy = TRUE;
5519 new_copy->cpy_page_list[new_copy->cpy_npages++] = m;
5520 vm_object_unlock(object);
5521 }
5522
5523 *caller_copy = new_copy;
5524 return (KERN_SUCCESS);
5525 }
5526
5527 kern_return_t
5528 vm_map_convert_from_page_list(copy)
5529 vm_map_copy_t copy;
5530 {
5531 vm_object_t object;
5532 int i;
5533 vm_map_entry_t new_entry;
5534 vm_page_t *page_list;
5535
5536 /*
5537 * Check type of copy object.
5538 */
5539 if (copy->type == VM_MAP_COPY_ENTRY_LIST) {
5540 return KERN_SUCCESS;
5541 }
5542 if (copy->type == VM_MAP_COPY_OBJECT) {
5543 printf("vm_map_convert_from_page_list: COPY_OBJECT?");
5544 return KERN_SUCCESS;
5545 }
5546 if (copy->type != VM_MAP_COPY_PAGE_LIST) {
5547 panic("vm_map_convert_from_page_list 0x%x %d",
5548 copy,
5549 copy->type);
5550 }
5551
5552 /*
5553 * Make sure the pages are loose. This may be
5554 * a "Can't Happen", but just to be safe ...
5555 */
5556 page_list = ©->cpy_page_list[0];
5557 if ((*page_list)->tabled)
5558 vm_map_copy_steal_pages(copy);
5559
5560 /*
5561 * Create object, and stuff pages into it.
5562 */
5563 object = vm_object_allocate(copy->cpy_npages);
5564 for (i = 0; i < copy->cpy_npages; i++) {
5565 register vm_page_t m = *page_list++;
5566 vm_page_insert(m, object, i * PAGE_SIZE);
5567 m->busy = FALSE;
5568 m->dirty = TRUE;
5569 vm_page_activate(m);
5570 }
5571
5572 /*
5573 * XXX If this page list contained a continuation, then
5574 * XXX we're screwed. The right thing to do is probably do
5575 * XXX the copyout, and then copyin the entry list we really
5576 * XXX wanted.
5577 */
5578 if (vm_map_copy_has_cont(copy))
5579 panic("convert_from_page_list: continuation");
5580
5581 /*
5582 * Change type of copy object
5583 */
5584 vm_map_copy_first_entry(copy) =
5585 vm_map_copy_last_entry(copy) = vm_map_copy_to_entry(copy);
5586 copy->type = VM_MAP_COPY_ENTRY_LIST;
5587 copy->cpy_hdr.nentries = 0;
5588 copy->cpy_hdr.entries_pageable = TRUE;
5589
5590 /*
5591 * Allocate and initialize an entry for object
5592 */
5593 new_entry = vm_map_copy_entry_create(copy);
5594 new_entry->vme_start = trunc_page(copy->offset);
5595 new_entry->vme_end = round_page(copy->offset + copy->size);
5596 new_entry->object.vm_object = object;
5597 new_entry->offset = 0;
5598 new_entry->is_shared = FALSE;
5599 new_entry->is_sub_map = FALSE;
5600 new_entry->needs_copy = FALSE;
5601 new_entry->protection = VM_PROT_DEFAULT;
5602 new_entry->max_protection = VM_PROT_ALL;
5603 new_entry->inheritance = VM_INHERIT_DEFAULT;
5604 new_entry->wired_count = 0;
5605 new_entry->user_wired_count = 0;
5606 new_entry->projected_on = 0;
5607
5608 /*
5609 * Insert entry into copy object, and return.
5610 */
5611 vm_map_copy_entry_link(copy, vm_map_copy_last_entry(copy), new_entry);
5612 return(KERN_SUCCESS);
5613 }
5614 #endif NORMA_IPC
Cache object: 4cb83359a735b261ae0012b9281cabbb
|