FreeBSD/Linux Kernel Cross Reference
sys/vm/vm_map.c
1 /*
2 * Mach Operating System
3 * Copyright (c) 1993-1987 Carnegie Mellon University
4 * All Rights Reserved.
5 *
6 * Permission to use, copy, modify and distribute this software and its
7 * documentation is hereby granted, provided that both the copyright
8 * notice and this permission notice appear in all copies of the
9 * software, derivative works or modified versions, and any portions
10 * thereof, and that both notices appear in supporting documentation.
11 *
12 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
13 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
14 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
15 *
16 * Carnegie Mellon requests users of this software to return to
17 *
18 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
19 * School of Computer Science
20 * Carnegie Mellon University
21 * Pittsburgh PA 15213-3890
22 *
23 * any improvements or extensions that they make and grant Carnegie Mellon
24 * the rights to redistribute these changes.
25 */
26 /*
27 * HISTORY
28 * $Log: vm_map.c,v $
29 * Revision 2.46 93/12/23 10:07:18 dbg
30 * Fixed vm_map_copyin_page_list to write-protect pages that it
31 * finds. This prevents the caller from modifying the page while
32 * the IO device is writing it.
33 *
34 * { The correct code disappeared when vm_map_copyin_page_list went
35 * in... }
36 *
37 * Also fixed the src_destroy case of vm_map_copyin to check for
38 * a shared-writable object (object->use_shared_copy) and do
39 * copy-on-write processing if so (a missing part of the share_map
40 * purge).
41 * [93/12/14 dbg]
42 *
43 * Revision 2.45 93/11/17 18:54:55 dbg
44 * Conditionalized projected buffer support under NET_ATM.
45 * [93/09/10 dbg]
46 * ANSI-fied.
47 * [93/06/16 dbg]
48 *
49 * Revision 2.44 93/08/10 15:12:59 mrt
50 * Included support for projected buffers. The projected_on field of new
51 * vm map entries is defaulted to null. projected_buffer_collect is called
52 * from vm_map_deallocate to garbage collect projected buffers when a task
53 * terminates. Projected buffer entries are cloned exactly on map inheritance.
54 * The user is denied direct deallocation of non-persistent projected buffers
55 * (these need to be deallocated by projected_buffer_deallocate, presumably
56 * by the device driver that created it).
57 * [93/02/16 09:37:28 jcb]
58 *
59 * Revision 2.43 93/01/14 18:01:12 danner
60 * 64bit cleanup.
61 * [92/12/01 af]
62 *
63 * Revision 2.42 92/08/03 18:00:59 jfriedl
64 * removed silly prototypes
65 * [92/08/02 jfriedl]
66 *
67 * Revision 2.41 92/05/23 12:08:19 jfriedl
68 * Removed unused variables. Some cleanup to quiet gcc warnings.
69 * [92/05/16 jfriedl]
70 *
71 * Revision 2.40 92/04/06 01:15:10 rpd
72 * Fixed vm_map_copyout_page_list to mark the pages dirty. From dlb.
73 * [92/04/05 rpd]
74 *
75 * Revision 2.39 92/04/01 19:36:48 rpd
76 * Removed pmap_remove_attributes.
77 * [92/03/25 rpd]
78 * Always need vm_map_copy_discard_cont (not just NORMA_IPC).
79 * Add continuation recognition optimization to vm_map_copy_discard
80 * to avoid tail recursion when vm_map_copy_discard_cont is used.
81 * [92/03/20 14:14:00 dlb]
82 *
83 * Move inheritance arg check out of vm_map_inherit.
84 * Its callers must do this.
85 * [92/02/25 16:59:30 dlb]
86 *
87 * Revision 2.38 92/03/03 00:44:56 rpd
88 * Another wiring fix for vm_map_copyout_page_list. From dlb.
89 * [92/03/02 rpd]
90 *
91 * Revision 2.37 92/03/01 15:15:20 rpd
92 * Fixed must_wire code in vm_map_copyout_page_list.
93 * Fixed vm_map_fork/VM_MAP_INHERIT_SHARE. From dlb.
94 * [92/03/01 rpd]
95 *
96 * Revision 2.36 92/02/23 19:51:30 elf
97 * Remove all keep_wired logic. wiring_required logic in
98 * vm_map_copyin_page_list is the replacement.
99 * [92/02/21 10:15:26 dlb]
100 *
101 * Change wiring_allowed to wiring_required. Pay attention to
102 * it in vm_map_copyout_page list. This is only for the default
103 * pager -- to make it fully general, vm_map_copyout has to
104 * be modified as well - see XXX comment at bottom of routine.
105 * [92/02/20 15:18:13 dlb]
106 *
107 * Use object->shadowed and object->use_shared_copy to
108 * detect sharing instead of the reference count.
109 * [92/02/19 17:39:10 dlb]
110 *
111 * Use is_shared field in map entries to detect sharing.
112 * Some minor bug fixes to the code that eliminates
113 * sharing maps.
114 * [92/02/19 14:25:30 dlb]
115 *
116 * Use object->use_shared_copy instead of new copy strategy.
117 * Removed all sharing map logic. Rewrote comments to reflect this.
118 * vm_map_verify_done is now a macro in vm_map.h as a result.
119 *
120 * First cut (commented out) at vm_map_copy_overwrite optimization
121 * to insert entries from copy into target, still needs work.
122 *
123 * Removed (bogus) single_use argument from vm_map_lookup().
124 *
125 * Replace share map logic in vm_map_fork with asynchronous
126 * copy-on-write. Check for null object in vm_map_entry_delete.
127 * [92/01/06 16:22:17 dlb]
128 *
129 * Fixed offset bug in vm_map_copyout_page_list.
130 * Fixed format errors in vm_map_copy_print.
131 * [92/01/09 15:32:44 jsb]
132 *
133 * Added vm_map_copy_print.
134 * [92/01/08 10:10:53 jsb]
135 *
136 * Revision 2.35 92/02/19 15:45:33 elf
137 * Picked up dlb fix for vm_map_copyin_page_list,
138 * to make the continuation args correctly.
139 * [92/02/19 rpd]
140 *
141 * Revision 2.34 92/02/19 15:09:57 elf
142 * Picked up dlb fix for vm_map_copyin_page_list,
143 * for when vm_fault_page returns memory-error.
144 * [92/02/14 rpd]
145 *
146 * Revision 2.33 92/01/03 20:34:57 dbg
147 * Fix erroneous boundary condition in search for free space in
148 * vm_map_copyout(), vm_map_copyout_page_list(). The 'end' of the
149 * entry is the last used address + 1, and therefore can be <= the
150 * start of the next entry. See vm_map_enter(), where it was done
151 * correctly.
152 * [91/12/23 dbg]
153 *
154 * Add vm_map_copy_copy. It is needed by kernel interface
155 * routines that may still return errors after copying a
156 * copy-object into kernel address space.
157 * [91/12/18 dbg]
158 *
159 * Revision 2.32 91/12/13 13:49:41 jsb
160 * Removed NORMA_ETHER always_steal hack.
161 *
162 * Revision 2.31 91/12/09 19:23:39 rpd
163 * Fixed vm_map_copyout_page_list a la vm_object_coalesce.
164 * Fixed vm_map_copy_overwrite to check for misalignment.
165 * Fixed some log infelicities.
166 * [91/12/09 rpd]
167 *
168 * Revision 2.30 91/12/10 13:26:34 jsb
169 * Simplify page list continuation abort logic.
170 * [91/12/10 12:56:06 jsb]
171 *
172 * Rewrite bogus (abort || src_destroy_only) logic in
173 * vm_map_copyin_page_list_cont.
174 * Change vm_map_convert_to_page_list routines to avoid
175 * leaking object references. Includes a new version of
176 * vm_map_copy_discard for use as a page list continuation.
177 * Hold a map reference in vm_map_copyin_page_list continuations.
178 * Add some checks in vm_map_convert_from_page_list.
179 * [91/12/04 dlb]
180 *
181 * Revision 2.29 91/11/14 17:08:56 rpd
182 * Add ifdef's to always steal code. Not needed (or wanted) except
183 * to workaround a norma ether bug. UGH!
184 * [91/11/14 jeffreyh]
185 *
186 * Add consume on success logic to vm_map_copyout_page_list.
187 * [91/11/12 dlb]
188 *
189 * Revision 2.28 91/11/14 16:56:51 rpd
190 * Made vm_map_convert_*_page_list_* routines look more like
191 * David's vm_map_copy{in,out}_page_list code.
192 * [91/11/00 jsb]
193 *
194 * Revision 2.27 91/10/09 16:20:05 af
195 * Fixed vm_map_copy_page_discard to lock before activating.
196 * Fixed vm_map_copyout_page_list to clear just the busy bit (from dlb).
197 * Fixed vm_map_copy_steal_pages to activate if necessary (from dlb).
198 * [91/10/07 rpd]
199 *
200 * Fixed vm_map_copyout_page_list to clear busy and dirty bits.
201 * [91/10/06 rpd]
202 *
203 * Picked up dlb fix for stealing wired pages in vm_map_copyin_page_list.
204 * [91/09/27 rpd]
205 *
206 * Revision 2.26 91/08/28 11:18:10 jsb
207 * Changed vm_map_copyout to discard the copy object only upon success.
208 * [91/08/03 rpd]
209 * Initialize copy->cpy_cont and copy->cpy_cont args in
210 * vm_map_convert_to_page_list and
211 * vm_map_convert_to_page_list_from_object.
212 * [91/08/16 10:34:53 jsb]
213 *
214 * Optimize stealing wired pages case of vm_map_copyin_page_list.
215 * [91/08/12 17:36:57 dlb]
216 *
217 * Move vm_map_copy_steal pages in this file. Improve comments,
218 * and related cleanup.
219 * [91/08/06 17:22:43 dlb]
220 *
221 * Split page release logic for page lists into separate
222 * routine, vm_map_copy_page_discard. Minor continuation
223 * bug fix.
224 * [91/08/05 17:48:23 dlb]
225 *
226 * Move logic that steals pages by making a new copy into a separate
227 * routine since both vm_map_{copyin,copyout}_page_list may need it.
228 * Also: Previous merge included logic to be a little more careful
229 * about what gets copied when a map entry is duplicated.
230 * [91/07/31 15:15:19 dlb]
231 *
232 * Implement vm_map_copy continuation for page lists.
233 * Implement in transition map entries needed by the above.
234 * [91/07/30 14:16:40 dlb]
235 *
236 * New and improved version of vm_map_copyin_page_list:
237 * Clean up error handling (especially vm_fault_page returns).
238 * Avoid holding map locks across faults and page copies.
239 * Move page stealing code to separate loop to deal with
240 * pagein errors from vm_fault_page.
241 * Add support for stealing wired pages (allows page stealing on
242 * pagein from default pager).
243 * [91/07/03 14:15:39 dlb]
244 * Restored definition of vm_map_convert_from_page_list.
245 * Added definition of vm_map_convert_to_page_list_from_object.
246 * Added call to vm_map_convert_from_page_list to vm_map_copy_overwrite.
247 * Added include of kern/assert.h.
248 * [91/08/15 13:20:13 jsb]
249 *
250 * Revision 2.25 91/08/03 18:19:58 jsb
251 * Removed vm_map_convert_from_page_list.
252 * Temporarily make vm_map_copyin_page_list always steal pages.
253 * [91/08/01 22:49:17 jsb]
254 *
255 * NORMA_IPC: Added vm_map_convert_{to,from}_page_list functions.
256 * These will be removed when all kernel interfaces
257 * understand page_list copy objects.
258 * [91/07/04 14:00:24 jsb]
259 *
260 * Removed obsoleted NORMA_IPC functions:
261 * ipc_clport_copyin_object
262 * vm_map_copyout_page
263 * ipc_clport_copyin_pagelist
264 * [91/07/04 13:20:09 jsb]
265 *
266 * Revision 2.24 91/07/01 08:27:22 jsb
267 * 20-Jun-91 David L. Black (dlb) at Open Software Foundation
268 * Add support for page list format map copies. NORMA/CLPORT code
269 * will be cut over later.
270 *
271 * 18-Jun-91 David L. Black (dlb) at Open Software Foundation
272 * Convert to use multiple format map copies.
273 * [91/06/29 16:37:03 jsb]
274 *
275 * Revision 2.23 91/06/25 10:33:33 rpd
276 * Changed mach_port_t to ipc_port_t where appropriate.
277 * [91/05/28 rpd]
278 *
279 * Revision 2.22 91/06/17 15:49:02 jsb
280 * Renamed NORMA conditionals.
281 * [91/06/17 11:11:13 jsb]
282 *
283 * Revision 2.21 91/06/06 17:08:22 jsb
284 * NORMA_IPC support (first cut):
285 * Work with page lists instead of copy objects.
286 * Make coalescing more useful.
287 * [91/05/14 09:34:41 jsb]
288 *
289 * Revision 2.20 91/05/18 14:40:53 rpd
290 * Restored mask argument to vm_map_find_entry.
291 * [91/05/02 rpd]
292 * Removed ZALLOC and ZFREE.
293 * [91/03/31 rpd]
294 *
295 * Revised vm_map_find_entry to allow coalescing of entries.
296 * [91/03/28 rpd]
297 * Removed map_data. Moved kentry_data here.
298 * [91/03/22 rpd]
299 *
300 * Revision 2.19 91/05/14 17:49:38 mrt
301 * Correcting copyright
302 *
303 * Revision 2.18 91/03/16 15:05:42 rpd
304 * Fixed vm_map_pmap_enter to activate loose pages after PMAP_ENTER.
305 * [91/03/11 rpd]
306 * Removed vm_map_find.
307 * [91/03/03 rpd]
308 * Fixed vm_map_entry_delete's use of vm_object_page_remove,
309 * following dlb's report.
310 * [91/01/26 rpd]
311 * Picked up dlb's fix for vm_map_fork/VM_INHERIT_COPY of wired entries.
312 * [91/01/12 rpd]
313 *
314 * Revision 2.17 91/02/05 17:58:43 mrt
315 * Changed to new Mach copyright
316 * [91/02/01 16:32:45 mrt]
317 *
318 * Revision 2.16 91/01/08 16:45:08 rpd
319 * Added continuation argument to thread_block.
320 * [90/12/08 rpd]
321 *
322 * Revision 2.15 90/11/05 14:34:26 rpd
323 * Removed vm_region_old_behavior.
324 * [90/11/02 rpd]
325 *
326 * Revision 2.14 90/10/25 14:50:18 rwd
327 * Fixed bug in vm_map_enter that was introduced in 2.13.
328 * [90/10/21 rpd]
329 *
330 * Revision 2.13 90/10/12 13:05:48 rpd
331 * Removed copy_on_write field.
332 * [90/10/08 rpd]
333 *
334 * Revision 2.12 90/08/06 15:08:31 rwd
335 * Fixed several bugs in the overwriting-permanent-memory case of
336 * vm_map_copy_overwrite, including an object reference leak.
337 * [90/07/26 rpd]
338 *
339 * Revision 2.11 90/06/19 23:02:09 rpd
340 * Picked up vm_submap_object, vm_map_fork share-map revisions,
341 * including Bohman's bug fix.
342 * [90/06/08 rpd]
343 *
344 * Fixed vm_region so that it doesn't treat sub-map entries (only
345 * found in the kernel map) as regular entries. Instead, it just
346 * ignores them and doesn't try to send back an object_name reference.
347 * [90/03/23 gk]
348 *
349 * Revision 2.10 90/06/02 15:10:57 rpd
350 * Moved vm_mapped_pages_info to vm/vm_debug.c.
351 * [90/05/31 rpd]
352 *
353 * In vm_map_copyin, if length is zero allow any source address.
354 * [90/04/23 rpd]
355 *
356 * Correct share/sub map confusion in vm_map_copy_overwrite.
357 * [90/04/22 rpd]
358 *
359 * In vm_map_copyout, make the current protection be VM_PROT_DEFAULT
360 * and the inheritance be VM_INHERIT_DEFAULT.
361 * [90/04/18 rpd]
362 *
363 * Removed some extraneous code from vm_map_copyin/vm_map_copyout.
364 * [90/03/28 rpd]
365 * Updated to new vm_map_pageable, with user_wired_count.
366 * Several bug fixes for vm_map_copy_overwrite.
367 * Added vm_map_copyin_object.
368 * [90/03/26 23:14:56 rpd]
369 *
370 * Revision 2.9 90/05/29 18:38:46 rwd
371 * Add flag to turn off forced pmap_enters in vm_map call.
372 * [90/05/12 rwd]
373 * Bug fix from rpd for OOL data to VM_PROT_DEFAULT. New
374 * vm_map_pmap_enter from rfr to preemtively enter pages on vm_map
375 * calls.
376 * [90/04/20 rwd]
377 *
378 * Revision 2.8 90/05/03 15:52:42 dbg
379 * Fix vm_map_copyout to set current protection of new entries to
380 * VM_PROT_DEFAULT, to match vm_allocate.
381 * [90/04/12 dbg]
382 *
383 * Add vm_mapped_pages_info under switch MACH_DEBUG.
384 * [90/04/06 dbg]
385 *
386 * Revision 2.7 90/02/22 20:05:52 dbg
387 * Combine fields in vm_map and vm_map_copy into a vm_map_header
388 * structure. Fix macros dealing with vm_map_t and vm_map_copy_t
389 * to operate on the header, so that most of the code they use can
390 * move back into the associated functions (to reduce space).
391 * [90/01/29 dbg]
392 *
393 * Add missing code to copy map entries from pageable to
394 * non-pageable zone in vm_map_copyout. Try to avoid
395 * vm_object_copy in vm_map_copyin if source will be
396 * destroyed. Fix vm_map_copy_overwrite to correctly
397 * check for gaps in destination when destination is
398 * temporary.
399 * [90/01/26 dbg]
400 *
401 * Add keep_wired parameter to vm_map_copyin.
402 * Remove vm_map_check_protection and vm_map_insert (not used).
403 * Rewrite vm_map_find to call vm_map_enter - should fix all
404 * callers instead.
405 * [90/01/25 dbg]
406 *
407 * Add changes from mainline:
408 *
409 * Fixed syntax errors in vm_map_print.
410 * Fixed use of vm_object_copy_slowly in vm_map_copyin.
411 * Restored similar fix to vm_map_copy_entry.
412 * [89/12/01 13:56:30 rpd]
413 * Make sure object lock is held before calling
414 * vm_object_copy_slowly. Release old destination object in wired
415 * case of vm_map_copy_entry. Fixes from rpd.
416 * [89/12/15 dlb]
417 *
418 * Modify vm_map_pageable to create new objects BEFORE clipping
419 * map entries to avoid object proliferation.
420 * [88/11/30 dlb]
421 *
422 * Check for holes when wiring memory in vm_map_pageable.
423 * Pass requested access type to vm_map_pageable and check it.
424 * [88/11/21 dlb]
425 *
426 * Handle overwriting permanent objects in vm_map_copy_overwrite().
427 *
428 * Put optimized copy path in vm_map_fork().
429 * [89/10/01 23:24:32 mwyoung]
430 *
431 * Integrate the "wait for space" option for kernel maps
432 * into this module.
433 *
434 * Add vm_map_copyin(), vm_map_copyout(), vm_map_copy_discard() to
435 * perform map copies.
436 *
437 * Convert vm_map_entry_create(), vm_map_clip_{start,end} so that
438 * they may be used with either a vm_map_t or a vm_map_copy_t.
439 *
440 * Use vme_next, vme_prev, vme_start, vme_end, vm_map_to_entry.
441 * [89/08/31 21:12:23 rpd]
442 *
443 * Picked up NeXT change to vm_region: now if you give it an
444 * address in the middle of an entry, it will use the start of
445 * the entry.
446 * [89/08/20 23:19:39 rpd]
447 *
448 * A bug fix from NeXT: vm_map_protect wasn't unlocking in the
449 * is_sub_map case. Also, fixed vm_map_copy_entry to not take
450 * the address of needs_copy, because it is a bit-field now.
451 * [89/08/19 23:43:55 rpd]
452 *
453 * Revision 2.6 90/01/22 23:09:20 af
454 * Added vm_map_machine_attributes().
455 * [90/01/20 17:27:12 af]
456 *
457 * Revision 2.5 90/01/19 14:36:05 rwd
458 * Enter wired pages in destination pmap in vm_move_entry_range, to
459 * correctly implement wiring semantics.
460 * [90/01/16 dbg]
461 *
462 * Revision 2.4 89/11/29 14:18:19 af
463 * Redefine VM_PROT_DEFAULT locally for mips.
464 *
465 * Revision 2.3 89/09/08 11:28:29 dbg
466 * Add hack to avoid deadlocking while wiring kernel memory.
467 * [89/08/31 dbg]
468 *
469 * Merged with [UNDOCUMENTED!] changes from rfr.
470 * [89/08/15 dbg]
471 *
472 * Clip source map entry in vm_move_entry_range, per RFR. Marking
473 * the entire data section copy-on-write is costing more than the
474 * clips (or not being able to collapse the object) ever would.
475 * [89/07/24 dbg]
476 *
477 * Add keep_wired parameter to vm_map_move, to wire destination if
478 * source is wired.
479 * [89/07/14 dbg]
480 *
481 * Revision 2.2 89/08/11 17:57:01 rwd
482 * Changes for MACH_KERNEL:
483 * . Break out the inner loop of vm_map_enter, so that
484 * kmem_alloc can use it.
485 * . Add vm_map_move as special case of vm_allocate/vm_map_copy.
486 * [89/04/28 dbg]
487 *
488 * Revision 2.11 89/04/18 21:25:58 mwyoung
489 * Recent history [mwyoung]:
490 * Add vm_map_simplify() to keep kernel maps more compact.
491 * Condensed history:
492 * Add vm_map_enter(). [mwyoung]
493 * Return a "version" from vm_map_lookup() to simplify
494 * locking. [mwyoung]
495 * Get pageability changes right. [dbg, dlb]
496 * Original implementation. [avie, mwyoung, dbg]
497 *
498 */
499 /*
500 * File: vm/vm_map.c
501 * Author: Avadis Tevanian, Jr., Michael Wayne Young
502 * Date: 1985
503 *
504 * Virtual memory mapping module.
505 */
506
507 #include <net_atm.h>
508 #include <norma_ipc.h>
509
510 #include <mach/kern_return.h>
511 #include <mach/port.h>
512 #include <mach/vm_attributes.h>
513 #include <mach/vm_param.h>
514 #include <kern/assert.h>
515 #include <kern/kern_io.h>
516 #include <kern/zalloc.h>
517 #include <vm/vm_fault.h>
518 #include <vm/vm_map.h>
519 #include <vm/vm_object.h>
520 #include <vm/vm_page.h>
521 #include <ipc/ipc_port.h>
522
523 #if NET_ATM
524 #include <vm/vm_kern.h>
525 #endif /* NET_ATM */
526
527 /*
528 * Macros to copy a vm_map_entry. We must be careful to correctly
529 * manage the wired page count. vm_map_entry_copy() creates a new
530 * map entry to the same memory - the wired count in the new entry
531 * must be set to zero. vm_map_entry_copy_full() creates a new
532 * entry that is identical to the old entry. This preserves the
533 * wire count; it's used for map splitting and zone changing in
534 * vm_map_copyout.
535 */
536 #define vm_map_entry_copy(NEW,OLD) \
537 MACRO_BEGIN \
538 *(NEW) = *(OLD); \
539 (NEW)->is_shared = FALSE; \
540 (NEW)->needs_wakeup = FALSE; \
541 (NEW)->in_transition = FALSE; \
542 (NEW)->wired_count = 0; \
543 (NEW)->user_wired_count = 0; \
544 MACRO_END
545
546 #define vm_map_entry_copy_full(NEW,OLD) (*(NEW) = *(OLD))
547
548 /*
549 * Virtual memory maps provide for the mapping, protection,
550 * and sharing of virtual memory objects. In addition,
551 * this module provides for an efficient virtual copy of
552 * memory from one map to another.
553 *
554 * Synchronization is required prior to most operations.
555 *
556 * Maps consist of an ordered doubly-linked list of simple
557 * entries; a single hint is used to speed up lookups.
558 *
559 * Sharing maps have been deleted from this version of Mach.
560 * All shared objects are now mapped directly into the respective
561 * maps. This requires a change in the copy on write strategy;
562 * the asymmetric (delayed) strategy is used for shared temporary
563 * objects instead of the symmetric (shadow) strategy. This is
564 * selected by the (new) use_shared_copy bit in the object. See
565 * vm_object_copy_temporary in vm_object.c for details. All maps
566 * are now "top level" maps (either task map, kernel map or submap
567 * of the kernel map).
568 *
569 * Since portions of maps are specified by start/end addreses,
570 * which may not align with existing map entries, all
571 * routines merely "clip" entries to these start/end values.
572 * [That is, an entry is split into two, bordering at a
573 * start or end value.] Note that these clippings may not
574 * always be necessary (as the two resulting entries are then
575 * not changed); however, the clipping is done for convenience.
576 * No attempt is currently made to "glue back together" two
577 * abutting entries.
578 *
579 * The symmetric (shadow) copy strategy implements virtual copy
580 * by copying VM object references from one map to
581 * another, and then marking both regions as copy-on-write.
582 * It is important to note that only one writeable reference
583 * to a VM object region exists in any map when this strategy
584 * is used -- this means that shadow object creation can be
585 * delayed until a write operation occurs. The asymmetric (delayed)
586 * strategy allows multiple maps to have writeable references to
587 * the same region of a vm object, and hence cannot delay creating
588 * its copy objects. See vm_object_copy_temporary() in vm_object.c.
589 * Copying of permanent objects is completely different; see
590 * vm_object_copy_strategically() in vm_object.c.
591 */
592
593 zone_t vm_map_zone; /* zone for vm_map structures */
594 zone_t vm_map_entry_zone; /* zone for vm_map_entry structures */
595 zone_t vm_map_kentry_zone; /* zone for kernel entry structures */
596 zone_t vm_map_copy_zone; /* zone for vm_map_copy structures */
597
598 /*
599 * Placeholder object for submap operations. This object is dropped
600 * into the range by a call to vm_map_find, and removed when
601 * vm_map_submap creates the submap.
602 */
603
604 vm_object_t vm_submap_object;
605
606 /*
607 * vm_map_init:
608 *
609 * Initialize the vm_map module. Must be called before
610 * any other vm_map routines.
611 *
612 * Map and entry structures are allocated from zones -- we must
613 * initialize those zones.
614 *
615 * There are three zones of interest:
616 *
617 * vm_map_zone: used to allocate maps.
618 * vm_map_entry_zone: used to allocate map entries.
619 * vm_map_kentry_zone: used to allocate map entries for the kernel.
620 *
621 * The kernel allocates map entries from a special zone that is initially
622 * "crammed" with memory. It would be difficult (perhaps impossible) for
623 * the kernel to allocate more memory to a entry zone when it became
624 * empty since the very act of allocating memory implies the creation
625 * of a new entry.
626 */
627
628 vm_offset_t kentry_data;
629 vm_size_t kentry_data_size;
630 int kentry_count = 256; /* to init kentry_data_size */
631
632 void vm_map_init(void)
633 {
634 vm_map_zone = zinit((vm_size_t) sizeof(struct vm_map), 40*1024,
635 PAGE_SIZE, FALSE, "maps");
636 vm_map_entry_zone = zinit((vm_size_t) sizeof(struct vm_map_entry),
637 1024*1024, PAGE_SIZE*5,
638 FALSE, "non-kernel map entries");
639 vm_map_kentry_zone = zinit((vm_size_t) sizeof(struct vm_map_entry),
640 kentry_data_size, kentry_data_size,
641 FALSE, "kernel map entries");
642
643 vm_map_copy_zone = zinit((vm_size_t) sizeof(struct vm_map_copy),
644 16*1024, PAGE_SIZE, FALSE,
645 "map copies");
646
647 /*
648 * Cram the kentry zone with initial data.
649 */
650 zcram(vm_map_kentry_zone, kentry_data, kentry_data_size);
651
652 /*
653 * Submap object is initialized by vm_object_init.
654 */
655 }
656
657 /*
658 * vm_map_create:
659 *
660 * Creates and returns a new empty VM map with
661 * the given physical map structure, and having
662 * the given lower and upper address bounds.
663 */
664 vm_map_t vm_map_create(
665 pmap_t pmap,
666 vm_offset_t min,
667 vm_offset_t max,
668 boolean_t pageable)
669 {
670 register vm_map_t result;
671
672 result = (vm_map_t) zalloc(vm_map_zone);
673 if (result == VM_MAP_NULL)
674 panic("vm_map_create");
675
676 vm_map_first_entry(result) = vm_map_to_entry(result);
677 vm_map_last_entry(result) = vm_map_to_entry(result);
678 result->hdr.nentries = 0;
679 result->hdr.entries_pageable = pageable;
680
681 result->size = 0;
682 result->ref_count = 1;
683 result->pmap = pmap;
684 result->min_offset = min;
685 result->max_offset = max;
686 result->wiring_required = FALSE;
687 result->wait_for_space = FALSE;
688 result->first_free = vm_map_to_entry(result);
689 result->hint = vm_map_to_entry(result);
690 vm_map_lock_init(result);
691 simple_lock_init(&result->ref_lock);
692 simple_lock_init(&result->hint_lock);
693
694 return result;
695 }
696
697 /*
698 * vm_map_entry_create: [ internal use only ]
699 *
700 * Allocates a VM map entry for insertion in the
701 * given map (or map copy). No fields are filled.
702 */
703 #define vm_map_entry_create(map) \
704 _vm_map_entry_create(&(map)->hdr)
705
706 #define vm_map_copy_entry_create(copy) \
707 _vm_map_entry_create(&(copy)->cpy_hdr)
708
709 vm_map_entry_t _vm_map_entry_create(
710 register struct vm_map_header *map_header)
711 {
712 register zone_t zone;
713 register vm_map_entry_t entry;
714
715 if (map_header->entries_pageable)
716 zone = vm_map_entry_zone;
717 else
718 zone = vm_map_kentry_zone;
719
720 entry = (vm_map_entry_t) zalloc(zone);
721 if (entry == VM_MAP_ENTRY_NULL)
722 panic("vm_map_entry_create");
723
724 return entry;
725 }
726
727 /*
728 * vm_map_entry_dispose: [ internal use only ]
729 *
730 * Inverse of vm_map_entry_create.
731 */
732 #define vm_map_entry_dispose(map, entry) \
733 _vm_map_entry_dispose(&(map)->hdr, (entry))
734
735 #define vm_map_copy_entry_dispose(map, entry) \
736 _vm_map_entry_dispose(&(copy)->cpy_hdr, (entry))
737
738 void _vm_map_entry_dispose(
739 register struct vm_map_header *map_header,
740 register vm_map_entry_t entry)
741 {
742 register zone_t zone;
743
744 if (map_header->entries_pageable)
745 zone = vm_map_entry_zone;
746 else
747 zone = vm_map_kentry_zone;
748
749 zfree(zone, (vm_offset_t) entry);
750 }
751
752 /*
753 * vm_map_entry_{un,}link:
754 *
755 * Insert/remove entries from maps (or map copies).
756 */
757 #define vm_map_entry_link(map, after_where, entry) \
758 _vm_map_entry_link(&(map)->hdr, after_where, entry)
759
760 #define vm_map_copy_entry_link(copy, after_where, entry) \
761 _vm_map_entry_link(&(copy)->cpy_hdr, after_where, entry)
762
763 #define _vm_map_entry_link(hdr, after_where, entry) \
764 MACRO_BEGIN \
765 (hdr)->nentries++; \
766 (entry)->vme_prev = (after_where); \
767 (entry)->vme_next = (after_where)->vme_next; \
768 (entry)->vme_prev->vme_next = \
769 (entry)->vme_next->vme_prev = (entry); \
770 MACRO_END
771
772 #define vm_map_entry_unlink(map, entry) \
773 _vm_map_entry_unlink(&(map)->hdr, entry)
774
775 #define vm_map_copy_entry_unlink(copy, entry) \
776 _vm_map_entry_unlink(&(copy)->cpy_hdr, entry)
777
778 #define _vm_map_entry_unlink(hdr, entry) \
779 MACRO_BEGIN \
780 (hdr)->nentries--; \
781 (entry)->vme_next->vme_prev = (entry)->vme_prev; \
782 (entry)->vme_prev->vme_next = (entry)->vme_next; \
783 MACRO_END
784
785 /*
786 * vm_map_reference:
787 *
788 * Creates another valid reference to the given map.
789 *
790 */
791 void vm_map_reference(
792 register vm_map_t map)
793 {
794 if (map == VM_MAP_NULL)
795 return;
796
797 simple_lock(&map->ref_lock);
798 map->ref_count++;
799 simple_unlock(&map->ref_lock);
800 }
801
802 /*
803 * vm_map_deallocate:
804 *
805 * Removes a reference from the specified map,
806 * destroying it if no references remain.
807 * The map should not be locked.
808 */
809 kern_return_t vm_map_delete(
810 vm_map_t map,
811 vm_offset_t start,
812 vm_offset_t end); /* forward */
813
814 void vm_map_deallocate(
815 register vm_map_t map)
816 {
817 register int c;
818
819 if (map == VM_MAP_NULL)
820 return;
821
822 simple_lock(&map->ref_lock);
823 c = --map->ref_count;
824 simple_unlock(&map->ref_lock);
825
826 if (c > 0) {
827 return;
828 }
829
830 #if NET_ATM
831 projected_buffer_collect(map);
832 #endif /* NET_ATM */
833
834 (void) vm_map_delete(map, map->min_offset, map->max_offset);
835
836 pmap_destroy(map->pmap);
837
838 zfree(vm_map_zone, (vm_offset_t) map);
839 }
840
841 /*
842 * SAVE_HINT:
843 *
844 * Saves the specified entry as the hint for
845 * future lookups. Performs necessary interlocks.
846 */
847 #define SAVE_HINT(map,value) \
848 simple_lock(&(map)->hint_lock); \
849 (map)->hint = (value); \
850 simple_unlock(&(map)->hint_lock);
851
852 /*
853 * vm_map_lookup_entry: [ internal use only ]
854 *
855 * Finds the map entry containing (or
856 * immediately preceding) the specified address
857 * in the given map; the entry is returned
858 * in the "entry" parameter. The boolean
859 * result indicates whether the address is
860 * actually contained in the map.
861 */
862 boolean_t vm_map_lookup_entry(
863 register vm_map_t map,
864 register vm_offset_t address,
865 vm_map_entry_t *entry) /* OUT */
866 {
867 register vm_map_entry_t cur;
868 register vm_map_entry_t last;
869
870 /*
871 * Start looking either from the head of the
872 * list, or from the hint.
873 */
874
875 simple_lock(&map->hint_lock);
876 cur = map->hint;
877 simple_unlock(&map->hint_lock);
878
879 if (cur == vm_map_to_entry(map))
880 cur = cur->vme_next;
881
882 if (address >= cur->vme_start) {
883 /*
884 * Go from hint to end of list.
885 *
886 * But first, make a quick check to see if
887 * we are already looking at the entry we
888 * want (which is usually the case).
889 * Note also that we don't need to save the hint
890 * here... it is the same hint (unless we are
891 * at the header, in which case the hint didn't
892 * buy us anything anyway).
893 */
894 last = vm_map_to_entry(map);
895 if ((cur != last) && (cur->vme_end > address)) {
896 *entry = cur;
897 return TRUE;
898 }
899 }
900 else {
901 /*
902 * Go from start to hint, *inclusively*
903 */
904 last = cur->vme_next;
905 cur = vm_map_first_entry(map);
906 }
907
908 /*
909 * Search linearly
910 */
911
912 while (cur != last) {
913 if (cur->vme_end > address) {
914 if (address >= cur->vme_start) {
915 /*
916 * Save this lookup for future
917 * hints, and return
918 */
919
920 *entry = cur;
921 SAVE_HINT(map, cur);
922 return TRUE;
923 }
924 break;
925 }
926 cur = cur->vme_next;
927 }
928 *entry = cur->vme_prev;
929 SAVE_HINT(map, *entry);
930 return FALSE;
931 }
932
933 #if NET_ATM
934 /*
935 * Routine: invalid_user_access
936 *
937 * Verifies whether user access is valid.
938 */
939
940 boolean_t
941 invalid_user_access(
942 vm_map_t map,
943 vm_offset_t start,
944 vm_offset_t end,
945 vm_prot_t prot)
946 {
947 vm_map_entry_t entry;
948
949 return (map == VM_MAP_NULL || map == kernel_map ||
950 !vm_map_lookup_entry(map, start, &entry) ||
951 entry->vme_end < end ||
952 (prot & ~(entry->protection)));
953 }
954 #endif /* NET_ATM */
955
956
957 /*
958 * Routine: vm_map_find_entry
959 * Purpose:
960 * Allocate a range in the specified virtual address map,
961 * returning the entry allocated for that range.
962 * Used by kmem_alloc, etc. Returns wired entries.
963 *
964 * The map must be locked.
965 *
966 * If an entry is allocated, the object/offset fields
967 * are initialized to zero. If an object is supplied,
968 * then an existing entry may be extended.
969 */
970 kern_return_t vm_map_find_entry(
971 register vm_map_t map,
972 vm_offset_t *address, /* OUT */
973 vm_size_t size,
974 vm_offset_t mask,
975 vm_object_t object,
976 vm_map_entry_t *o_entry) /* OUT */
977 {
978 register vm_map_entry_t entry, new_entry;
979 register vm_offset_t start;
980 register vm_offset_t end;
981
982 /*
983 * Look for the first possible address;
984 * if there's already something at this
985 * address, we have to start after it.
986 */
987
988 if ((entry = map->first_free) == vm_map_to_entry(map))
989 start = map->min_offset;
990 else
991 start = entry->vme_end;
992
993 /*
994 * In any case, the "entry" always precedes
995 * the proposed new region throughout the loop:
996 */
997
998 while (TRUE) {
999 register vm_map_entry_t next;
1000
1001 /*
1002 * Find the end of the proposed new region.
1003 * Be sure we didn't go beyond the end, or
1004 * wrap around the address.
1005 */
1006
1007 start = ((start + mask) & ~mask);
1008 end = start + size;
1009
1010 if ((end > map->max_offset) || (end < start))
1011 return KERN_NO_SPACE;
1012
1013 /*
1014 * If there are no more entries, we must win.
1015 */
1016
1017 next = entry->vme_next;
1018 if (next == vm_map_to_entry(map))
1019 break;
1020
1021 /*
1022 * If there is another entry, it must be
1023 * after the end of the potential new region.
1024 */
1025
1026 if (next->vme_start >= end)
1027 break;
1028
1029 /*
1030 * Didn't fit -- move to the next entry.
1031 */
1032
1033 entry = next;
1034 start = entry->vme_end;
1035 }
1036
1037 /*
1038 * At this point,
1039 * "start" and "end" should define the endpoints of the
1040 * available new range, and
1041 * "entry" should refer to the region before the new
1042 * range, and
1043 *
1044 * the map should be locked.
1045 */
1046
1047 *address = start;
1048
1049 /*
1050 * See whether we can avoid creating a new entry by
1051 * extending one of our neighbors. [So far, we only attempt to
1052 * extend from below.]
1053 */
1054
1055 if ((object != VM_OBJECT_NULL) &&
1056 (entry != vm_map_to_entry(map)) &&
1057 (entry->vme_end == start) &&
1058 (!entry->is_shared) &&
1059 (!entry->is_sub_map) &&
1060 (entry->object.vm_object == object) &&
1061 (entry->needs_copy == FALSE) &&
1062 (entry->inheritance == VM_INHERIT_DEFAULT) &&
1063 (entry->protection == VM_PROT_DEFAULT) &&
1064 (entry->max_protection == VM_PROT_ALL) &&
1065 (entry->wired_count == 1) &&
1066 (entry->user_wired_count == 0)
1067 #if NET_ATM
1068 && (entry->projected_on == 0)
1069 #endif /* NET_ATM */
1070 )
1071 { /*
1072 * Because this is a special case,
1073 * we don't need to use vm_object_coalesce.
1074 */
1075
1076 entry->vme_end = end;
1077 new_entry = entry;
1078 } else {
1079 new_entry = vm_map_entry_create(map);
1080
1081 new_entry->vme_start = start;
1082 new_entry->vme_end = end;
1083
1084 new_entry->is_shared = FALSE;
1085 new_entry->is_sub_map = FALSE;
1086 new_entry->object.vm_object = VM_OBJECT_NULL;
1087 new_entry->offset = (vm_offset_t) 0;
1088
1089 new_entry->needs_copy = FALSE;
1090
1091 new_entry->inheritance = VM_INHERIT_DEFAULT;
1092 new_entry->protection = VM_PROT_DEFAULT;
1093 new_entry->max_protection = VM_PROT_ALL;
1094 new_entry->wired_count = 1;
1095 new_entry->user_wired_count = 0;
1096
1097 new_entry->in_transition = FALSE;
1098 new_entry->needs_wakeup = FALSE;
1099 #if NET_ATM
1100 new_entry->projected_on = 0;
1101 #endif /* NET_ATM */
1102
1103 /*
1104 * Insert the new entry into the list
1105 */
1106
1107 vm_map_entry_link(map, entry, new_entry);
1108 }
1109
1110 map->size += size;
1111
1112 /*
1113 * Update the free space hint and the lookup hint
1114 */
1115
1116 map->first_free = new_entry;
1117 SAVE_HINT(map, new_entry);
1118
1119 *o_entry = new_entry;
1120 return KERN_SUCCESS;
1121 }
1122
1123 int vm_map_pmap_enter_print = FALSE;
1124 int vm_map_pmap_enter_enable = FALSE;
1125
1126 /*
1127 * Routine: vm_map_pmap_enter
1128 *
1129 * Description:
1130 * Force pages from the specified object to be entered into
1131 * the pmap at the specified address if they are present.
1132 * As soon as a page not found in the object the scan ends.
1133 *
1134 * Returns:
1135 * Nothing.
1136 *
1137 * In/out conditions:
1138 * The source map should not be locked on entry.
1139 */
1140 void
1141 vm_map_pmap_enter(
1142 vm_map_t map,
1143 register
1144 vm_offset_t addr,
1145 register
1146 vm_offset_t end_addr,
1147 register
1148 vm_object_t object,
1149 vm_offset_t offset,
1150 vm_prot_t protection)
1151 {
1152 while (addr < end_addr) {
1153 register vm_page_t m;
1154
1155 vm_object_lock(object);
1156 vm_object_paging_begin(object);
1157
1158 m = vm_page_lookup(object, offset);
1159 if (m == VM_PAGE_NULL || m->absent) {
1160 vm_object_paging_end(object);
1161 vm_object_unlock(object);
1162 return;
1163 }
1164
1165 if (vm_map_pmap_enter_print) {
1166 printf("vm_map_pmap_enter:");
1167 printf("map: %#x, addr: %#x, object: %#x, offset: %#x\n",
1168 (vm_offset_t) map, addr,
1169 (vm_offset_t) object, offset);
1170 }
1171
1172 m->busy = TRUE;
1173 vm_object_unlock(object);
1174
1175 PMAP_ENTER(map->pmap, addr, m,
1176 protection, FALSE);
1177
1178 vm_object_lock(object);
1179 PAGE_WAKEUP_DONE(m);
1180 vm_page_lock_queues();
1181 if (!m->active && !m->inactive)
1182 vm_page_activate(m);
1183 vm_page_unlock_queues();
1184 vm_object_paging_end(object);
1185 vm_object_unlock(object);
1186
1187 offset += PAGE_SIZE;
1188 addr += PAGE_SIZE;
1189 }
1190 }
1191
1192 /*
1193 * Routine: vm_map_enter
1194 *
1195 * Description:
1196 * Allocate a range in the specified virtual address map.
1197 * The resulting range will refer to memory defined by
1198 * the given memory object and offset into that object.
1199 *
1200 * Arguments are as defined in the vm_map call.
1201 */
1202 kern_return_t vm_map_enter(
1203 register
1204 vm_map_t map,
1205 vm_offset_t *address, /* IN/OUT */
1206 vm_size_t size,
1207 vm_offset_t mask,
1208 boolean_t anywhere,
1209 vm_object_t object,
1210 vm_offset_t offset,
1211 boolean_t needs_copy,
1212 vm_prot_t cur_protection,
1213 vm_prot_t max_protection,
1214 vm_inherit_t inheritance)
1215 {
1216 register vm_map_entry_t entry;
1217 register vm_offset_t start;
1218 register vm_offset_t end;
1219 kern_return_t result = KERN_SUCCESS;
1220
1221 #define RETURN(value) { result = value; goto BailOut; }
1222
1223 StartAgain: ;
1224
1225 start = *address;
1226
1227 if (anywhere) {
1228 vm_map_lock(map);
1229
1230 /*
1231 * Calculate the first possible address.
1232 */
1233
1234 if (start < map->min_offset)
1235 start = map->min_offset;
1236 if (start > map->max_offset)
1237 RETURN(KERN_NO_SPACE);
1238
1239 /*
1240 * Look for the first possible address;
1241 * if there's already something at this
1242 * address, we have to start after it.
1243 */
1244
1245 if (start == map->min_offset) {
1246 if ((entry = map->first_free) != vm_map_to_entry(map))
1247 start = entry->vme_end;
1248 } else {
1249 vm_map_entry_t tmp_entry;
1250 if (vm_map_lookup_entry(map, start, &tmp_entry))
1251 start = tmp_entry->vme_end;
1252 entry = tmp_entry;
1253 }
1254
1255 /*
1256 * In any case, the "entry" always precedes
1257 * the proposed new region throughout the
1258 * loop:
1259 */
1260
1261 while (TRUE) {
1262 register vm_map_entry_t next;
1263
1264 /*
1265 * Find the end of the proposed new region.
1266 * Be sure we didn't go beyond the end, or
1267 * wrap around the address.
1268 */
1269
1270 start = ((start + mask) & ~mask);
1271 end = start + size;
1272
1273 if ((end > map->max_offset) || (end < start)) {
1274 if (map->wait_for_space) {
1275 if (size <= (map->max_offset -
1276 map->min_offset)) {
1277 assert_wait((event_t) map, TRUE);
1278 vm_map_unlock(map);
1279 thread_block(CONTINUE_NULL);
1280 goto StartAgain;
1281 }
1282 }
1283
1284 RETURN(KERN_NO_SPACE);
1285 }
1286
1287 /*
1288 * If there are no more entries, we must win.
1289 */
1290
1291 next = entry->vme_next;
1292 if (next == vm_map_to_entry(map))
1293 break;
1294
1295 /*
1296 * If there is another entry, it must be
1297 * after the end of the potential new region.
1298 */
1299
1300 if (next->vme_start >= end)
1301 break;
1302
1303 /*
1304 * Didn't fit -- move to the next entry.
1305 */
1306
1307 entry = next;
1308 start = entry->vme_end;
1309 }
1310 *address = start;
1311 } else {
1312 vm_map_entry_t temp_entry;
1313
1314 /*
1315 * Verify that:
1316 * the address doesn't itself violate
1317 * the mask requirement.
1318 */
1319
1320 if ((start & mask) != 0)
1321 return(KERN_NO_SPACE);
1322
1323 vm_map_lock(map);
1324
1325 /*
1326 * ... the address is within bounds
1327 */
1328
1329 end = start + size;
1330
1331 if ((start < map->min_offset) ||
1332 (end > map->max_offset) ||
1333 (start >= end)) {
1334 RETURN(KERN_INVALID_ADDRESS);
1335 }
1336
1337 /*
1338 * ... the starting address isn't allocated
1339 */
1340
1341 if (vm_map_lookup_entry(map, start, &temp_entry))
1342 RETURN(KERN_NO_SPACE);
1343
1344 entry = temp_entry;
1345
1346 /*
1347 * ... the next region doesn't overlap the
1348 * end point.
1349 */
1350
1351 if ((entry->vme_next != vm_map_to_entry(map)) &&
1352 (entry->vme_next->vme_start < end))
1353 RETURN(KERN_NO_SPACE);
1354 }
1355
1356 /*
1357 * At this point,
1358 * "start" and "end" should define the endpoints of the
1359 * available new range, and
1360 * "entry" should refer to the region before the new
1361 * range, and
1362 *
1363 * the map should be locked.
1364 */
1365
1366 /*
1367 * See whether we can avoid creating a new entry (and object) by
1368 * extending one of our neighbors. [So far, we only attempt to
1369 * extend from below.]
1370 */
1371
1372 if ((object == VM_OBJECT_NULL) &&
1373 (entry != vm_map_to_entry(map)) &&
1374 (entry->vme_end == start) &&
1375 (!entry->is_shared) &&
1376 (!entry->is_sub_map) &&
1377 (entry->inheritance == inheritance) &&
1378 (entry->protection == cur_protection) &&
1379 (entry->max_protection == max_protection) &&
1380 (entry->wired_count == 0) /* implies user_wired_count == 0 */
1381 #if NET_ATM
1382 && (entry->projected_on == 0)
1383 #endif /* NET_ATM */
1384 )
1385 {
1386 if (vm_object_coalesce(entry->object.vm_object,
1387 VM_OBJECT_NULL,
1388 entry->offset,
1389 (vm_offset_t) 0,
1390 (vm_size_t)(entry->vme_end - entry->vme_start),
1391 (vm_size_t)(end - entry->vme_end))) {
1392
1393 /*
1394 * Coalesced the two objects - can extend
1395 * the previous map entry to include the
1396 * new range.
1397 */
1398 map->size += (end - entry->vme_end);
1399 entry->vme_end = end;
1400 RETURN(KERN_SUCCESS);
1401 }
1402 }
1403
1404 /*
1405 * Create a new entry
1406 */
1407
1408 /**/ {
1409 register vm_map_entry_t new_entry;
1410
1411 new_entry = vm_map_entry_create(map);
1412
1413 new_entry->vme_start = start;
1414 new_entry->vme_end = end;
1415
1416 new_entry->is_shared = FALSE;
1417 new_entry->is_sub_map = FALSE;
1418 new_entry->object.vm_object = object;
1419 new_entry->offset = offset;
1420
1421 new_entry->needs_copy = needs_copy;
1422
1423 new_entry->inheritance = inheritance;
1424 new_entry->protection = cur_protection;
1425 new_entry->max_protection = max_protection;
1426 new_entry->wired_count = 0;
1427 new_entry->user_wired_count = 0;
1428
1429 new_entry->in_transition = FALSE;
1430 new_entry->needs_wakeup = FALSE;
1431 #if NET_ATM
1432 new_entry->projected_on = 0;
1433 #endif /* NET_ATM */
1434
1435 /*
1436 * Insert the new entry into the list
1437 */
1438
1439 vm_map_entry_link(map, entry, new_entry);
1440 map->size += size;
1441
1442 /*
1443 * Update the free space hint and the lookup hint
1444 */
1445
1446 if ((map->first_free == entry) &&
1447 (entry->vme_end >= new_entry->vme_start))
1448 map->first_free = new_entry;
1449
1450 SAVE_HINT(map, new_entry);
1451
1452 vm_map_unlock(map);
1453
1454 if ((object != VM_OBJECT_NULL) &&
1455 (vm_map_pmap_enter_enable) &&
1456 (!anywhere) &&
1457 (!needs_copy) &&
1458 (size < (128*1024))) {
1459 vm_map_pmap_enter(map, start, end,
1460 object, offset, cur_protection);
1461 }
1462
1463 return result;
1464 /**/ }
1465
1466 BailOut: ;
1467
1468 vm_map_unlock(map);
1469 return result;
1470
1471 #undef RETURN
1472 }
1473
1474 /*
1475 * vm_map_clip_start: [ internal use only ]
1476 *
1477 * Asserts that the given entry begins at or after
1478 * the specified address; if necessary,
1479 * it splits the entry into two.
1480 */
1481 #define vm_map_clip_start(map, entry, startaddr) \
1482 MACRO_BEGIN \
1483 if ((startaddr) > (entry)->vme_start) \
1484 _vm_map_clip_start(&(map)->hdr,(entry),(startaddr)); \
1485 MACRO_END
1486
1487 #define vm_map_copy_clip_start(copy, entry, startaddr) \
1488 MACRO_BEGIN \
1489 if ((startaddr) > (entry)->vme_start) \
1490 _vm_map_clip_start(&(copy)->cpy_hdr,(entry),(startaddr)); \
1491 MACRO_END
1492
1493 /*
1494 * This routine is called only when it is known that
1495 * the entry must be split.
1496 */
1497 void _vm_map_clip_start(
1498 register struct vm_map_header *map_header,
1499 register vm_map_entry_t entry,
1500 register vm_offset_t start)
1501 {
1502 register vm_map_entry_t new_entry;
1503
1504 /*
1505 * Split off the front portion --
1506 * note that we must insert the new
1507 * entry BEFORE this one, so that
1508 * this entry has the specified starting
1509 * address.
1510 */
1511
1512 new_entry = _vm_map_entry_create(map_header);
1513 vm_map_entry_copy_full(new_entry, entry);
1514
1515 new_entry->vme_end = start;
1516 entry->offset += (start - entry->vme_start);
1517 entry->vme_start = start;
1518
1519 _vm_map_entry_link(map_header, entry->vme_prev, new_entry);
1520
1521 if (entry->is_sub_map)
1522 vm_map_reference(new_entry->object.sub_map);
1523 else
1524 vm_object_reference(new_entry->object.vm_object);
1525 }
1526
1527 /*
1528 * vm_map_clip_end: [ internal use only ]
1529 *
1530 * Asserts that the given entry ends at or before
1531 * the specified address; if necessary,
1532 * it splits the entry into two.
1533 */
1534 #define vm_map_clip_end(map, entry, endaddr) \
1535 MACRO_BEGIN \
1536 if ((endaddr) < (entry)->vme_end) \
1537 _vm_map_clip_end(&(map)->hdr,(entry),(endaddr)); \
1538 MACRO_END
1539
1540 #define vm_map_copy_clip_end(copy, entry, endaddr) \
1541 MACRO_BEGIN \
1542 if ((endaddr) < (entry)->vme_end) \
1543 _vm_map_clip_end(&(copy)->cpy_hdr,(entry),(endaddr)); \
1544 MACRO_END
1545
1546 /*
1547 * This routine is called only when it is known that
1548 * the entry must be split.
1549 */
1550 void _vm_map_clip_end(
1551 register struct vm_map_header *map_header,
1552 register vm_map_entry_t entry,
1553 register vm_offset_t end)
1554 {
1555 register vm_map_entry_t new_entry;
1556
1557 /*
1558 * Create a new entry and insert it
1559 * AFTER the specified entry
1560 */
1561
1562 new_entry = _vm_map_entry_create(map_header);
1563 vm_map_entry_copy_full(new_entry, entry);
1564
1565 new_entry->vme_start = entry->vme_end = end;
1566 new_entry->offset += (end - entry->vme_start);
1567
1568 _vm_map_entry_link(map_header, entry, new_entry);
1569
1570 if (entry->is_sub_map)
1571 vm_map_reference(new_entry->object.sub_map);
1572 else
1573 vm_object_reference(new_entry->object.vm_object);
1574 }
1575
1576 /*
1577 * VM_MAP_RANGE_CHECK: [ internal use only ]
1578 *
1579 * Asserts that the starting and ending region
1580 * addresses fall within the valid range of the map.
1581 */
1582 #define VM_MAP_RANGE_CHECK(map, start, end) \
1583 MACRO_BEGIN \
1584 if (start < vm_map_min(map)) \
1585 start = vm_map_min(map); \
1586 if (end > vm_map_max(map)) \
1587 end = vm_map_max(map); \
1588 if (start > end) \
1589 start = end; \
1590 MACRO_END
1591
1592 /*
1593 * vm_map_submap: [ kernel use only ]
1594 *
1595 * Mark the given range as handled by a subordinate map.
1596 *
1597 * This range must have been created with vm_map_find using
1598 * the vm_submap_object, and no other operations may have been
1599 * performed on this range prior to calling vm_map_submap.
1600 *
1601 * Only a limited number of operations can be performed
1602 * within this rage after calling vm_map_submap:
1603 * vm_fault
1604 * [Don't try vm_map_copyin!]
1605 *
1606 * To remove a submapping, one must first remove the
1607 * range from the superior map, and then destroy the
1608 * submap (if desired). [Better yet, don't try it.]
1609 */
1610 kern_return_t vm_map_submap(
1611 register vm_map_t map,
1612 register vm_offset_t start,
1613 register vm_offset_t end,
1614 vm_map_t submap)
1615 {
1616 vm_map_entry_t entry;
1617 register kern_return_t result = KERN_INVALID_ARGUMENT;
1618 register vm_object_t object;
1619
1620 vm_map_lock(map);
1621
1622 VM_MAP_RANGE_CHECK(map, start, end);
1623
1624 if (vm_map_lookup_entry(map, start, &entry)) {
1625 vm_map_clip_start(map, entry, start);
1626 }
1627 else
1628 entry = entry->vme_next;
1629
1630 vm_map_clip_end(map, entry, end);
1631
1632 if ((entry->vme_start == start) && (entry->vme_end == end) &&
1633 (!entry->is_sub_map) &&
1634 ((object = entry->object.vm_object) == vm_submap_object) &&
1635 (object->resident_page_count == 0) &&
1636 (object->copy == VM_OBJECT_NULL) &&
1637 (object->shadow == VM_OBJECT_NULL) &&
1638 (!object->pager_created)) {
1639 entry->object.vm_object = VM_OBJECT_NULL;
1640 vm_object_deallocate(object);
1641 entry->is_sub_map = TRUE;
1642 vm_map_reference(entry->object.sub_map = submap);
1643 result = KERN_SUCCESS;
1644 }
1645 vm_map_unlock(map);
1646
1647 return result;
1648 }
1649
1650 /*
1651 * vm_map_protect:
1652 *
1653 * Sets the protection of the specified address
1654 * region in the target map. If "set_max" is
1655 * specified, the maximum protection is to be set;
1656 * otherwise, only the current protection is affected.
1657 */
1658 kern_return_t vm_map_protect(
1659 register vm_map_t map,
1660 register vm_offset_t start,
1661 register vm_offset_t end,
1662 register vm_prot_t new_prot,
1663 register boolean_t set_max)
1664 {
1665 register vm_map_entry_t current;
1666 vm_map_entry_t entry;
1667
1668 vm_map_lock(map);
1669
1670 VM_MAP_RANGE_CHECK(map, start, end);
1671
1672 if (vm_map_lookup_entry(map, start, &entry)) {
1673 vm_map_clip_start(map, entry, start);
1674 }
1675 else
1676 entry = entry->vme_next;
1677
1678 /*
1679 * Make a first pass to check for protection
1680 * violations.
1681 */
1682
1683 current = entry;
1684 while ((current != vm_map_to_entry(map)) &&
1685 (current->vme_start < end)) {
1686
1687 if (current->is_sub_map) {
1688 vm_map_unlock(map);
1689 return KERN_INVALID_ARGUMENT;
1690 }
1691 if ((new_prot & current->max_protection) != new_prot) {
1692 vm_map_unlock(map);
1693 return KERN_PROTECTION_FAILURE;
1694 }
1695
1696 current = current->vme_next;
1697 }
1698
1699 /*
1700 * Go back and fix up protections.
1701 * [Note that clipping is not necessary the second time.]
1702 */
1703
1704 current = entry;
1705
1706 while ((current != vm_map_to_entry(map)) &&
1707 (current->vme_start < end)) {
1708
1709 vm_prot_t old_prot;
1710
1711 vm_map_clip_end(map, current, end);
1712
1713 old_prot = current->protection;
1714 if (set_max)
1715 current->protection =
1716 (current->max_protection = new_prot) &
1717 old_prot;
1718 else
1719 current->protection = new_prot;
1720
1721 /*
1722 * Update physical map if necessary.
1723 */
1724
1725 if (current->protection != old_prot) {
1726 pmap_protect(map->pmap, current->vme_start,
1727 current->vme_end,
1728 current->protection);
1729 }
1730 current = current->vme_next;
1731 }
1732
1733 vm_map_unlock(map);
1734 return KERN_SUCCESS;
1735 }
1736
1737 /*
1738 * vm_map_inherit:
1739 *
1740 * Sets the inheritance of the specified address
1741 * range in the target map. Inheritance
1742 * affects how the map will be shared with
1743 * child maps at the time of vm_map_fork.
1744 */
1745 kern_return_t vm_map_inherit(
1746 register vm_map_t map,
1747 register vm_offset_t start,
1748 register vm_offset_t end,
1749 register vm_inherit_t new_inheritance)
1750 {
1751 register vm_map_entry_t entry;
1752 vm_map_entry_t temp_entry;
1753
1754 vm_map_lock(map);
1755
1756 VM_MAP_RANGE_CHECK(map, start, end);
1757
1758 if (vm_map_lookup_entry(map, start, &temp_entry)) {
1759 entry = temp_entry;
1760 vm_map_clip_start(map, entry, start);
1761 }
1762 else
1763 entry = temp_entry->vme_next;
1764
1765 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
1766 vm_map_clip_end(map, entry, end);
1767
1768 entry->inheritance = new_inheritance;
1769
1770 entry = entry->vme_next;
1771 }
1772
1773 vm_map_unlock(map);
1774 return KERN_SUCCESS;
1775 }
1776
1777 /*
1778 * vm_map_pageable_common:
1779 *
1780 * Sets the pageability of the specified address
1781 * range in the target map. Regions specified
1782 * as not pageable require locked-down physical
1783 * memory and physical page maps. access_type indicates
1784 * types of accesses that must not generate page faults.
1785 * This is checked against protection of memory being locked-down.
1786 * access_type of VM_PROT_NONE makes memory pageable.
1787 *
1788 * The map must not be locked, but a reference
1789 * must remain to the map throughout the call.
1790 *
1791 * Callers should use macros in vm/vm_map.h (i.e. vm_map_pageable,
1792 * or vm_map_pageable_user); don't call vm_map_pageable directly.
1793 */
1794 kern_return_t vm_map_pageable_common(
1795 register vm_map_t map,
1796 register vm_offset_t start,
1797 register vm_offset_t end,
1798 register vm_prot_t access_type,
1799 boolean_t user_wire)
1800 {
1801 register vm_map_entry_t entry;
1802 vm_map_entry_t start_entry;
1803
1804 vm_map_lock(map);
1805
1806 VM_MAP_RANGE_CHECK(map, start, end);
1807
1808 if (vm_map_lookup_entry(map, start, &start_entry)) {
1809 entry = start_entry;
1810 /*
1811 * vm_map_clip_start will be done later.
1812 */
1813 }
1814 else {
1815 /*
1816 * Start address is not in map; this is fatal.
1817 */
1818 vm_map_unlock(map);
1819 return KERN_FAILURE;
1820 }
1821
1822 /*
1823 * Actions are rather different for wiring and unwiring,
1824 * so we have two separate cases.
1825 */
1826
1827 if (access_type == VM_PROT_NONE) {
1828
1829 vm_map_clip_start(map, entry, start);
1830
1831 /*
1832 * Unwiring. First ensure that the range to be
1833 * unwired is really wired down.
1834 */
1835 while ((entry != vm_map_to_entry(map)) &&
1836 (entry->vme_start < end)) {
1837
1838 if ((entry->wired_count == 0) ||
1839 ((entry->vme_end < end) &&
1840 ((entry->vme_next == vm_map_to_entry(map)) ||
1841 (entry->vme_next->vme_start > entry->vme_end))) ||
1842 (user_wire && (entry->user_wired_count == 0))) {
1843 vm_map_unlock(map);
1844 return KERN_INVALID_ARGUMENT;
1845 }
1846 entry = entry->vme_next;
1847 }
1848
1849 /*
1850 * Now decrement the wiring count for each region.
1851 * If a region becomes completely unwired,
1852 * unwire its physical pages and mappings.
1853 */
1854 entry = start_entry;
1855 while ((entry != vm_map_to_entry(map)) &&
1856 (entry->vme_start < end)) {
1857 vm_map_clip_end(map, entry, end);
1858
1859 if (user_wire) {
1860 if (--(entry->user_wired_count) == 0)
1861 entry->wired_count--;
1862 }
1863 else {
1864 entry->wired_count--;
1865 }
1866
1867 if (entry->wired_count == 0)
1868 vm_fault_unwire(map, entry);
1869
1870 entry = entry->vme_next;
1871 }
1872 }
1873
1874 else {
1875 /*
1876 * Wiring. We must do this in two passes:
1877 *
1878 * 1. Holding the write lock, we create any shadow
1879 * or zero-fill objects that need to be created.
1880 * Then we clip each map entry to the region to be
1881 * wired and increment its wiring count. We
1882 * create objects before clipping the map entries
1883 * to avoid object proliferation.
1884 *
1885 * 2. We downgrade to a read lock, and call
1886 * vm_fault_wire to fault in the pages for any
1887 * newly wired area (wired_count is 1).
1888 *
1889 * Downgrading to a read lock for vm_fault_wire avoids
1890 * a possible deadlock with another thread that may have
1891 * faulted on one of the pages to be wired (it would mark
1892 * the page busy, blocking us, then in turn block on the
1893 * map lock that we hold). Because of problems in the
1894 * recursive lock package, we cannot upgrade to a write
1895 * lock in vm_map_lookup. Thus, any actions that require
1896 * the write lock must be done beforehand. Because we
1897 * keep the read lock on the map, the copy-on-write
1898 * status of the entries we modify here cannot change.
1899 */
1900
1901 /*
1902 * Pass 1.
1903 */
1904 while ((entry != vm_map_to_entry(map)) &&
1905 (entry->vme_start < end)) {
1906 vm_map_clip_end(map, entry, end);
1907
1908 if (entry->wired_count == 0) {
1909
1910 /*
1911 * Perform actions of vm_map_lookup that need
1912 * the write lock on the map: create a shadow
1913 * object for a copy-on-write region, or an
1914 * object for a zero-fill region.
1915 */
1916 if (entry->needs_copy &&
1917 ((entry->protection & VM_PROT_WRITE) != 0)) {
1918
1919 vm_object_shadow(&entry->object.vm_object,
1920 &entry->offset,
1921 (vm_size_t)(entry->vme_end
1922 - entry->vme_start));
1923 entry->needs_copy = FALSE;
1924 }
1925 if (entry->object.vm_object == VM_OBJECT_NULL) {
1926 entry->object.vm_object =
1927 vm_object_allocate(
1928 (vm_size_t)(entry->vme_end
1929 - entry->vme_start));
1930 entry->offset = (vm_offset_t)0;
1931 }
1932 }
1933 vm_map_clip_start(map, entry, start);
1934 vm_map_clip_end(map, entry, end);
1935
1936 if (user_wire) {
1937 if ((entry->user_wired_count)++ == 0)
1938 entry->wired_count++;
1939 }
1940 else {
1941 entry->wired_count++;
1942 }
1943
1944 /*
1945 * Check for holes and protection mismatch.
1946 * Holes: Next entry should be contiguous unless
1947 * this is the end of the region.
1948 * Protection: Access requested must be allowed.
1949 */
1950 if (((entry->vme_end < end) &&
1951 ((entry->vme_next == vm_map_to_entry(map)) ||
1952 (entry->vme_next->vme_start > entry->vme_end))) ||
1953 ((entry->protection & access_type) != access_type)) {
1954 /*
1955 * Found a hole or protection problem.
1956 * Object creation actions
1957 * do not need to be undone, but the
1958 * wired counts need to be restored.
1959 */
1960 while ((entry != vm_map_to_entry(map)) &&
1961 (entry->vme_end > start)) {
1962 if (user_wire) {
1963 if (--(entry->user_wired_count) == 0)
1964 entry->wired_count--;
1965 }
1966 else {
1967 entry->wired_count--;
1968 }
1969
1970 entry = entry->vme_prev;
1971 }
1972
1973 vm_map_unlock(map);
1974 return KERN_FAILURE;
1975 }
1976 entry = entry->vme_next;
1977 }
1978
1979 /*
1980 * Pass 2.
1981 */
1982
1983 /*
1984 * HACK HACK HACK HACK
1985 *
1986 * If we are wiring in the kernel map or a submap of it,
1987 * unlock the map to avoid deadlocks. We trust that the
1988 * kernel threads are well-behaved, and therefore will
1989 * not do anything destructive to this region of the map
1990 * while we have it unlocked. We cannot trust user threads
1991 * to do the same.
1992 *
1993 * HACK HACK HACK HACK
1994 */
1995 if (vm_map_pmap(map) == kernel_pmap) {
1996 vm_map_unlock(map); /* trust me ... */
1997 }
1998 else {
1999 vm_map_lock_set_recursive(map);
2000 vm_map_lock_write_to_read(map);
2001 }
2002
2003 entry = start_entry;
2004 while (entry != vm_map_to_entry(map) &&
2005 entry->vme_start < end) {
2006 /*
2007 * Wiring cases:
2008 * Kernel: wired == 1 && user_wired == 0
2009 * User: wired == 1 && user_wired == 1
2010 *
2011 * Don't need to wire if either is > 1. wired = 0 &&
2012 * user_wired == 1 can't happen.
2013 */
2014
2015 /*
2016 * XXX This assumes that the faults always succeed.
2017 */
2018 if ((entry->wired_count == 1) &&
2019 (entry->user_wired_count <= 1)) {
2020 vm_fault_wire(map, entry);
2021 }
2022 entry = entry->vme_next;
2023 }
2024
2025 if (vm_map_pmap(map) == kernel_pmap) {
2026 vm_map_lock(map);
2027 }
2028 else {
2029 vm_map_lock_clear_recursive(map);
2030 }
2031 }
2032
2033 vm_map_unlock(map);
2034
2035 return KERN_SUCCESS;
2036 }
2037
2038 /*
2039 * vm_map_entry_delete: [ internal use only ]
2040 *
2041 * Deallocate the given entry from the target map.
2042 */
2043 void vm_map_entry_delete(
2044 register vm_map_t map,
2045 register vm_map_entry_t entry)
2046 {
2047 register vm_offset_t s, e;
2048 register vm_object_t object;
2049 extern vm_object_t kernel_object;
2050
2051 s = entry->vme_start;
2052 e = entry->vme_end;
2053
2054 #if NET_ATM
2055 /*Check if projected buffer*/
2056 if (map != kernel_map && entry->projected_on != 0) {
2057 /*Check if projected kernel entry is persistent;
2058 may only manipulate directly if it is*/
2059 if (entry->projected_on->projected_on == 0)
2060 entry->wired_count = 0; /*Avoid unwire fault*/
2061 else
2062 return;
2063 }
2064 #endif /* NET_ATM */
2065
2066 /*
2067 * Get the object. Null objects cannot have pmap entries.
2068 */
2069
2070 if ((object = entry->object.vm_object) != VM_OBJECT_NULL) {
2071
2072 /*
2073 * Unwire before removing addresses from the pmap;
2074 * otherwise, unwiring will put the entries back in
2075 * the pmap.
2076 */
2077
2078 if (entry->wired_count != 0) {
2079 vm_fault_unwire(map, entry);
2080 entry->wired_count = 0;
2081 entry->user_wired_count = 0;
2082 }
2083
2084 /*
2085 * If the object is shared, we must remove
2086 * *all* references to this data, since we can't
2087 * find all of the physical maps which are sharing
2088 * it.
2089 */
2090
2091 if (object == kernel_object) {
2092 vm_object_lock(object);
2093 vm_object_page_remove(object, entry->offset,
2094 entry->offset + (e - s));
2095 vm_object_unlock(object);
2096 } else if (entry->is_shared) {
2097 vm_object_pmap_remove(object,
2098 entry->offset,
2099 entry->offset + (e - s));
2100 }
2101 else {
2102 pmap_remove(map->pmap, s, e);
2103 }
2104 }
2105
2106 /*
2107 * Deallocate the object only after removing all
2108 * pmap entries pointing to its pages.
2109 */
2110
2111 if (entry->is_sub_map)
2112 vm_map_deallocate(entry->object.sub_map);
2113 else
2114 vm_object_deallocate(entry->object.vm_object);
2115
2116 vm_map_entry_unlink(map, entry);
2117 map->size -= e - s;
2118
2119 vm_map_entry_dispose(map, entry);
2120 }
2121
2122 /*
2123 * vm_map_delete: [ internal use only ]
2124 *
2125 * Deallocates the given address range from the target
2126 * map.
2127 */
2128
2129 kern_return_t vm_map_delete(
2130 register vm_map_t map,
2131 register vm_offset_t start,
2132 register vm_offset_t end)
2133 {
2134 vm_map_entry_t entry;
2135 vm_map_entry_t first_entry;
2136
2137 /*
2138 * Find the start of the region, and clip it
2139 */
2140
2141 if (!vm_map_lookup_entry(map, start, &first_entry))
2142 entry = first_entry->vme_next;
2143 else {
2144 entry = first_entry;
2145 #if NORMA_IPC_xxx
2146 /*
2147 * XXX Had to disable this code because:
2148
2149 _vm_map_delete(c0804b78,c2198000,c219a000,0,c219a000)+df
2150 [vm/vm_map.c:2007]
2151 _vm_map_remove(c0804b78,c2198000,c219a000,c0817834,
2152 c081786c)+42 [vm/vm_map.c:2094]
2153 _kmem_io_map_deallocate(c0804b78,c2198000,2000,c0817834,
2154 c081786c)+43 [vm/vm_kern.c:818]
2155 _device_write_dealloc(c081786c)+117 [device/ds_routines.c:814]
2156 _ds_write_done(c081786c,0)+2e [device/ds_routines.c:848]
2157 _io_done_thread_continue(c08150c0,c21d4e14,c21d4e30,c08150c0,
2158 c080c114)+14 [device/ds_routines.c:1350]
2159
2160 */
2161 if (start > entry->vme_start
2162 && end == entry->vme_end
2163 && ! entry->wired_count /* XXX ??? */
2164 && ! entry->is_shared
2165 #if NET_ATM
2166 && ! entry->projected_on
2167 #endif /* NET_ATM */
2168 && ! entry->is_sub_map) {
2169 extern vm_object_t kernel_object;
2170 register vm_object_t object = entry->object.vm_object;
2171
2172 /*
2173 * The region to be deleted lives at the end
2174 * of this entry, and thus all we have to do is
2175 * truncate the entry.
2176 *
2177 * This special case is necessary if we want
2178 * coalescing to do us any good.
2179 *
2180 * XXX Do we have to adjust object size?
2181 */
2182 if (object == kernel_object) {
2183 vm_object_lock(object);
2184 vm_object_page_remove(object,
2185 entry->offset + start,
2186 entry->offset +
2187 (end - start));
2188 vm_object_unlock(object);
2189 } else if (entry->is_shared) {
2190 vm_object_pmap_remove(object,
2191 entry->offset + start,
2192 entry->offset +
2193 (end - start));
2194 } else {
2195 pmap_remove(map->pmap, start, end);
2196 }
2197 object->size -= (end - start); /* XXX */
2198
2199 entry->vme_end = start;
2200 map->size -= (end - start);
2201
2202 if (map->wait_for_space) {
2203 thread_wakeup((event_t) map);
2204 }
2205 return KERN_SUCCESS;
2206 }
2207 #endif /* NORMA_IPC */
2208 vm_map_clip_start(map, entry, start);
2209
2210 /*
2211 * Fix the lookup hint now, rather than each
2212 * time though the loop.
2213 */
2214
2215 SAVE_HINT(map, entry->vme_prev);
2216 }
2217
2218 /*
2219 * Save the free space hint
2220 */
2221
2222 if (map->first_free->vme_start >= start)
2223 map->first_free = entry->vme_prev;
2224
2225 /*
2226 * Step through all entries in this region
2227 */
2228
2229 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
2230 vm_map_entry_t next;
2231
2232 vm_map_clip_end(map, entry, end);
2233
2234 /*
2235 * If the entry is in transition, we must wait
2236 * for it to exit that state. It could be clipped
2237 * while we leave the map unlocked.
2238 */
2239 if(entry->in_transition) {
2240 /*
2241 * Say that we are waiting, and wait for entry.
2242 */
2243 entry->needs_wakeup = TRUE;
2244 vm_map_entry_wait(map, FALSE);
2245 vm_map_lock(map);
2246
2247 /*
2248 * The entry could have been clipped or it
2249 * may not exist anymore. look it up again.
2250 */
2251 if(!vm_map_lookup_entry(map, start, &entry)) {
2252 entry = entry->vme_next;
2253 }
2254 continue;
2255 }
2256
2257 next = entry->vme_next;
2258
2259 vm_map_entry_delete(map, entry);
2260 entry = next;
2261 }
2262
2263 if (map->wait_for_space)
2264 thread_wakeup((event_t) map);
2265
2266 return KERN_SUCCESS;
2267 }
2268
2269 /*
2270 * vm_map_remove:
2271 *
2272 * Remove the given address range from the target map.
2273 * This is the exported form of vm_map_delete.
2274 */
2275 kern_return_t vm_map_remove(
2276 register vm_map_t map,
2277 register vm_offset_t start,
2278 register vm_offset_t end)
2279 {
2280 register kern_return_t result;
2281
2282 vm_map_lock(map);
2283 VM_MAP_RANGE_CHECK(map, start, end);
2284 result = vm_map_delete(map, start, end);
2285 vm_map_unlock(map);
2286
2287 return result;
2288 }
2289
2290
2291 /*
2292 * vm_map_copy_steal_pages:
2293 *
2294 * Steal all the pages from a vm_map_copy page_list by copying ones
2295 * that have not already been stolen.
2296 */
2297 void
2298 vm_map_copy_steal_pages(
2299 vm_map_copy_t copy)
2300 {
2301 register vm_page_t m, new_m;
2302 register int i;
2303 vm_object_t object;
2304
2305 for (i = 0; i < copy->cpy_npages; i++) {
2306
2307 /*
2308 * If the page is not tabled, then it's already stolen.
2309 */
2310 m = copy->cpy_page_list[i];
2311 if (!m->tabled)
2312 continue;
2313
2314 /*
2315 * Page was not stolen, get a new
2316 * one and do the copy now.
2317 */
2318 while ((new_m = vm_page_grab()) == VM_PAGE_NULL) {
2319 VM_PAGE_WAIT(CONTINUE_NULL);
2320 }
2321
2322 vm_page_copy(m, new_m);
2323
2324 object = m->object;
2325 vm_object_lock(object);
2326 vm_page_lock_queues();
2327 if (!m->active && !m->inactive)
2328 vm_page_activate(m);
2329 vm_page_unlock_queues();
2330 PAGE_WAKEUP_DONE(m);
2331 vm_object_paging_end(object);
2332 vm_object_unlock(object);
2333
2334 copy->cpy_page_list[i] = new_m;
2335 }
2336 }
2337
2338 /*
2339 * vm_map_copy_page_discard:
2340 *
2341 * Get rid of the pages in a page_list copy. If the pages are
2342 * stolen, they are freed. If the pages are not stolen, they
2343 * are unbusied, and associated state is cleaned up.
2344 */
2345 void vm_map_copy_page_discard(
2346 vm_map_copy_t copy)
2347 {
2348 while (copy->cpy_npages > 0) {
2349 vm_page_t m;
2350
2351 if((m = copy->cpy_page_list[--(copy->cpy_npages)]) !=
2352 VM_PAGE_NULL) {
2353
2354 /*
2355 * If it's not in the table, then it's
2356 * a stolen page that goes back
2357 * to the free list. Else it belongs
2358 * to some object, and we hold a
2359 * paging reference on that object.
2360 */
2361 if (!m->tabled) {
2362 VM_PAGE_FREE(m);
2363 }
2364 else {
2365 vm_object_t object;
2366
2367 object = m->object;
2368
2369 vm_object_lock(object);
2370 vm_page_lock_queues();
2371 if (!m->active && !m->inactive)
2372 vm_page_activate(m);
2373 vm_page_unlock_queues();
2374
2375 PAGE_WAKEUP_DONE(m);
2376 vm_object_paging_end(object);
2377 vm_object_unlock(object);
2378 }
2379 }
2380 }
2381 }
2382
2383 /*
2384 * Routine: vm_map_copy_discard
2385 *
2386 * Description:
2387 * Dispose of a map copy object (returned by
2388 * vm_map_copyin).
2389 */
2390 void
2391 vm_map_copy_discard(
2392 vm_map_copy_t copy)
2393 {
2394 free_next_copy:
2395 if (copy == VM_MAP_COPY_NULL)
2396 return;
2397
2398 switch (copy->type) {
2399 case VM_MAP_COPY_ENTRY_LIST:
2400 while (vm_map_copy_first_entry(copy) !=
2401 vm_map_copy_to_entry(copy)) {
2402 vm_map_entry_t entry = vm_map_copy_first_entry(copy);
2403
2404 vm_map_copy_entry_unlink(copy, entry);
2405 vm_object_deallocate(entry->object.vm_object);
2406 vm_map_copy_entry_dispose(copy, entry);
2407 }
2408 break;
2409 case VM_MAP_COPY_OBJECT:
2410 vm_object_deallocate(copy->cpy_object);
2411 break;
2412 case VM_MAP_COPY_PAGE_LIST:
2413
2414 /*
2415 * To clean this up, we have to unbusy all the pages
2416 * and release the paging references in their objects.
2417 */
2418 if (copy->cpy_npages > 0)
2419 vm_map_copy_page_discard(copy);
2420
2421 /*
2422 * If there's a continuation, abort it. The
2423 * abort routine releases any storage.
2424 */
2425 if (vm_map_copy_has_cont(copy)) {
2426
2427 /*
2428 * Special case: recognize
2429 * vm_map_copy_discard_cont and optimize
2430 * here to avoid tail recursion.
2431 */
2432 if (copy->cpy_cont == vm_map_copy_discard_cont) {
2433 register vm_map_copy_t new_copy;
2434
2435 new_copy = (vm_map_copy_t) copy->cpy_cont_args;
2436 zfree(vm_map_copy_zone, (vm_offset_t) copy);
2437 copy = new_copy;
2438 goto free_next_copy;
2439 }
2440 else {
2441 vm_map_copy_abort_cont(copy);
2442 }
2443 }
2444
2445 break;
2446 }
2447 zfree(vm_map_copy_zone, (vm_offset_t) copy);
2448 }
2449
2450 /*
2451 * Routine: vm_map_copy_copy
2452 *
2453 * Description:
2454 * Move the information in a map copy object to
2455 * a new map copy object, leaving the old one
2456 * empty.
2457 *
2458 * This is used by kernel routines that need
2459 * to look at out-of-line data (in copyin form)
2460 * before deciding whether to return SUCCESS.
2461 * If the routine returns FAILURE, the original
2462 * copy object will be deallocated; therefore,
2463 * these routines must make a copy of the copy
2464 * object and leave the original empty so that
2465 * deallocation will not fail.
2466 */
2467 vm_map_copy_t
2468 vm_map_copy_copy(
2469 vm_map_copy_t copy)
2470 {
2471 vm_map_copy_t new_copy;
2472
2473 if (copy == VM_MAP_COPY_NULL)
2474 return VM_MAP_COPY_NULL;
2475
2476 /*
2477 * Allocate a new copy object, and copy the information
2478 * from the old one into it.
2479 */
2480
2481 new_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
2482 *new_copy = *copy;
2483
2484 if (copy->type == VM_MAP_COPY_ENTRY_LIST) {
2485 /*
2486 * The links in the entry chain must be
2487 * changed to point to the new copy object.
2488 */
2489 vm_map_copy_first_entry(copy)->vme_prev
2490 = vm_map_copy_to_entry(new_copy);
2491 vm_map_copy_last_entry(copy)->vme_next
2492 = vm_map_copy_to_entry(new_copy);
2493 }
2494
2495 /*
2496 * Change the old copy object into one that contains
2497 * nothing to be deallocated.
2498 */
2499 copy->type = VM_MAP_COPY_OBJECT;
2500 copy->cpy_object = VM_OBJECT_NULL;
2501
2502 /*
2503 * Return the new object.
2504 */
2505 return new_copy;
2506 }
2507
2508 /*
2509 * Routine: vm_map_copy_discard_cont
2510 *
2511 * Description:
2512 * A version of vm_map_copy_discard that can be called
2513 * as a continuation from a vm_map_copy page list.
2514 */
2515 kern_return_t vm_map_copy_discard_cont(
2516 void * cont_args,
2517 vm_map_copy_t *copy_result) /* OUT */
2518 {
2519 vm_map_copy_discard((vm_map_copy_t) cont_args);
2520 if (copy_result != (vm_map_copy_t *)0)
2521 *copy_result = VM_MAP_COPY_NULL;
2522 return KERN_SUCCESS;
2523 }
2524
2525 /*
2526 * Routine: vm_map_copy_overwrite
2527 *
2528 * Description:
2529 * Copy the memory described by the map copy
2530 * object (copy; returned by vm_map_copyin) onto
2531 * the specified destination region (dst_map, dst_addr).
2532 * The destination must be writeable.
2533 *
2534 * Unlike vm_map_copyout, this routine actually
2535 * writes over previously-mapped memory. If the
2536 * previous mapping was to a permanent (user-supplied)
2537 * memory object, it is preserved.
2538 *
2539 * The attributes (protection and inheritance) of the
2540 * destination region are preserved.
2541 *
2542 * If successful, consumes the copy object.
2543 * Otherwise, the caller is responsible for it.
2544 *
2545 * Implementation notes:
2546 * To overwrite temporary virtual memory, it is
2547 * sufficient to remove the previous mapping and insert
2548 * the new copy. This replacement is done either on
2549 * the whole region (if no permanent virtual memory
2550 * objects are embedded in the destination region) or
2551 * in individual map entries.
2552 *
2553 * To overwrite permanent virtual memory, it is
2554 * necessary to copy each page, as the external
2555 * memory management interface currently does not
2556 * provide any optimizations.
2557 *
2558 * Once a page of permanent memory has been overwritten,
2559 * it is impossible to interrupt this function; otherwise,
2560 * the call would be neither atomic nor location-independent.
2561 * The kernel-state portion of a user thread must be
2562 * interruptible.
2563 *
2564 * It may be expensive to forward all requests that might
2565 * overwrite permanent memory (vm_write, vm_copy) to
2566 * uninterruptible kernel threads. This routine may be
2567 * called by interruptible threads; however, success is
2568 * not guaranteed -- if the request cannot be performed
2569 * atomically and interruptibly, an error indication is
2570 * returned.
2571 */
2572 kern_return_t vm_map_copy_overwrite(
2573 vm_map_t dst_map,
2574 vm_offset_t dst_addr,
2575 vm_map_copy_t copy,
2576 boolean_t interruptible)
2577 {
2578 vm_size_t size;
2579 vm_offset_t start;
2580 vm_map_entry_t tmp_entry;
2581 vm_map_entry_t entry;
2582
2583 boolean_t contains_permanent_objects = FALSE;
2584
2585 interruptible = FALSE; /* XXX */
2586
2587 /*
2588 * Check for null copy object.
2589 */
2590
2591 if (copy == VM_MAP_COPY_NULL)
2592 return KERN_SUCCESS;
2593
2594 /*
2595 * Only works for entry lists at the moment. Will
2596 * support page lists LATER.
2597 */
2598
2599 #if NORMA_IPC
2600 vm_map_convert_from_page_list(copy);
2601 #else
2602 assert(copy->type == VM_MAP_COPY_ENTRY_LIST);
2603 #endif
2604
2605 /*
2606 * Currently this routine only handles page-aligned
2607 * regions. Eventually, it should handle misalignments
2608 * by actually copying pages.
2609 */
2610
2611 if (!page_aligned(copy->offset) ||
2612 !page_aligned(copy->size) ||
2613 !page_aligned(dst_addr))
2614 return KERN_INVALID_ARGUMENT;
2615
2616 size = copy->size;
2617
2618 if (size == 0) {
2619 vm_map_copy_discard(copy);
2620 return KERN_SUCCESS;
2621 }
2622
2623 /*
2624 * Verify that the destination is all writeable
2625 * initially.
2626 */
2627 start_pass_1:
2628 vm_map_lock(dst_map);
2629 if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) {
2630 vm_map_unlock(dst_map);
2631 return KERN_INVALID_ADDRESS;
2632 }
2633 vm_map_clip_start(dst_map, tmp_entry, dst_addr);
2634 for (entry = tmp_entry;;) {
2635 vm_size_t sub_size = (entry->vme_end - entry->vme_start);
2636 vm_map_entry_t next = entry->vme_next;
2637
2638 if ( ! (entry->protection & VM_PROT_WRITE)) {
2639 vm_map_unlock(dst_map);
2640 return KERN_PROTECTION_FAILURE;
2641 }
2642
2643 /*
2644 * If the entry is in transition, we must wait
2645 * for it to exit that state. Anything could happen
2646 * when we unlock the map, so start over.
2647 */
2648 if (entry->in_transition) {
2649
2650 /*
2651 * Say that we are waiting, and wait for entry.
2652 */
2653 entry->needs_wakeup = TRUE;
2654 vm_map_entry_wait(dst_map, FALSE);
2655
2656 goto start_pass_1;
2657 }
2658
2659 if (size <= sub_size)
2660 break;
2661
2662 if ((next == vm_map_to_entry(dst_map)) ||
2663 (next->vme_start != entry->vme_end)) {
2664 vm_map_unlock(dst_map);
2665 return KERN_INVALID_ADDRESS;
2666 }
2667
2668
2669 /*
2670 * Check for permanent objects in the destination.
2671 */
2672
2673 if ((entry->object.vm_object != VM_OBJECT_NULL) &&
2674 !entry->object.vm_object->temporary)
2675 contains_permanent_objects = TRUE;
2676
2677 size -= sub_size;
2678 entry = next;
2679 }
2680
2681 /*
2682 * If there are permanent objects in the destination, then
2683 * the copy cannot be interrupted.
2684 */
2685
2686 if (interruptible && contains_permanent_objects)
2687 return KERN_FAILURE ; /* XXX */
2688
2689 /*
2690 * XXXO If there are no permanent objects in the destination,
2691 * XXXO and the source and destination map entry zones match,
2692 * XXXO and the destination map entry is not shared,
2693 * XXXO then the map entries can be deleted and replaced
2694 * XXXO with those from the copy. The following code is the
2695 * XXXO basic idea of what to do, but there are lots of annoying
2696 * XXXO little details about getting protection and inheritance
2697 * XXXO right. Should add protection, inheritance, and sharing checks
2698 * XXXO to the above pass and make sure that no wiring is involved.
2699 */
2700 /*
2701 * if (!contains_permanent_objects &&
2702 * copy->cpy_hdr.entries_pageable == dst_map->hdr.entries_pageable) {
2703 *
2704 * *
2705 * * Run over copy and adjust entries. Steal code
2706 * * from vm_map_copyout() to do this.
2707 * *
2708 *
2709 * tmp_entry = tmp_entry->vme_prev;
2710 * vm_map_delete(dst_map, dst_addr, dst_addr + copy->size);
2711 * vm_map_copy_insert(dst_map, tmp_entry, copy);
2712 *
2713 * vm_map_unlock(dst_map);
2714 * vm_map_copy_discard(copy);
2715 * }
2716 */
2717 /*
2718 *
2719 * Make a second pass, overwriting the data
2720 * At the beginning of each loop iteration,
2721 * the next entry to be overwritten is "tmp_entry"
2722 * (initially, the value returned from the lookup above),
2723 * and the starting address expected in that entry
2724 * is "start".
2725 */
2726
2727 start = dst_addr;
2728
2729 while (vm_map_copy_first_entry(copy) != vm_map_copy_to_entry(copy)) {
2730 vm_map_entry_t copy_entry = vm_map_copy_first_entry(copy);
2731 vm_size_t copy_size = (copy_entry->vme_end - copy_entry->vme_start);
2732 vm_object_t object;
2733
2734 entry = tmp_entry;
2735 size = (entry->vme_end - entry->vme_start);
2736 /*
2737 * Make sure that no holes popped up in the
2738 * address map, and that the protection is
2739 * still valid, in case the map was unlocked
2740 * earlier.
2741 */
2742
2743 if (entry->vme_start != start) {
2744 vm_map_unlock(dst_map);
2745 return KERN_INVALID_ADDRESS;
2746 }
2747 assert(entry != vm_map_to_entry(dst_map));
2748
2749 /*
2750 * Check protection again
2751 */
2752
2753 if ( ! (entry->protection & VM_PROT_WRITE)) {
2754 vm_map_unlock(dst_map);
2755 return KERN_PROTECTION_FAILURE;
2756 }
2757
2758 /*
2759 * Adjust to source size first
2760 */
2761
2762 if (copy_size < size) {
2763 vm_map_clip_end(dst_map, entry, entry->vme_start + copy_size);
2764 size = copy_size;
2765 }
2766
2767 /*
2768 * Adjust to destination size
2769 */
2770
2771 if (size < copy_size) {
2772 vm_map_copy_clip_end(copy, copy_entry,
2773 copy_entry->vme_start + size);
2774 copy_size = size;
2775 }
2776
2777 assert((entry->vme_end - entry->vme_start) == size);
2778 assert((tmp_entry->vme_end - tmp_entry->vme_start) == size);
2779 assert((copy_entry->vme_end - copy_entry->vme_start) == size);
2780
2781 /*
2782 * If the destination contains temporary unshared memory,
2783 * we can perform the copy by throwing it away and
2784 * installing the source data.
2785 */
2786
2787 object = entry->object.vm_object;
2788 if (!entry->is_shared &&
2789 ((object == VM_OBJECT_NULL) || object->temporary)) {
2790 vm_object_t old_object = entry->object.vm_object;
2791 vm_offset_t old_offset = entry->offset;
2792
2793 entry->object = copy_entry->object;
2794 entry->offset = copy_entry->offset;
2795 entry->needs_copy = copy_entry->needs_copy;
2796 entry->wired_count = 0;
2797 entry->user_wired_count = 0;
2798
2799 vm_map_copy_entry_unlink(copy, copy_entry);
2800 vm_map_copy_entry_dispose(copy, copy_entry);
2801
2802 vm_object_pmap_protect(
2803 old_object,
2804 old_offset,
2805 size,
2806 dst_map->pmap,
2807 tmp_entry->vme_start,
2808 VM_PROT_NONE);
2809
2810 vm_object_deallocate(old_object);
2811
2812 /*
2813 * Set up for the next iteration. The map
2814 * has not been unlocked, so the next
2815 * address should be at the end of this
2816 * entry, and the next map entry should be
2817 * the one following it.
2818 */
2819
2820 start = tmp_entry->vme_end;
2821 tmp_entry = tmp_entry->vme_next;
2822 } else {
2823 vm_map_version_t version;
2824 vm_object_t dst_object = entry->object.vm_object;
2825 vm_offset_t dst_offset = entry->offset;
2826 kern_return_t r;
2827
2828 /*
2829 * Take an object reference, and record
2830 * the map version information so that the
2831 * map can be safely unlocked.
2832 */
2833
2834 vm_object_reference(dst_object);
2835
2836 version.main_timestamp = dst_map->timestamp;
2837
2838 vm_map_unlock(dst_map);
2839
2840 /*
2841 * Copy as much as possible in one pass
2842 */
2843
2844 copy_size = size;
2845 r = vm_fault_copy(
2846 copy_entry->object.vm_object,
2847 copy_entry->offset,
2848 ©_size,
2849 dst_object,
2850 dst_offset,
2851 dst_map,
2852 &version,
2853 FALSE /* XXX interruptible */ );
2854
2855 /*
2856 * Release the object reference
2857 */
2858
2859 vm_object_deallocate(dst_object);
2860
2861 /*
2862 * If a hard error occurred, return it now
2863 */
2864
2865 if (r != KERN_SUCCESS)
2866 return r;
2867
2868 if (copy_size != 0) {
2869 /*
2870 * Dispose of the copied region
2871 */
2872
2873 vm_map_copy_clip_end(copy, copy_entry,
2874 copy_entry->vme_start + copy_size);
2875 vm_map_copy_entry_unlink(copy, copy_entry);
2876 vm_object_deallocate(copy_entry->object.vm_object);
2877 vm_map_copy_entry_dispose(copy, copy_entry);
2878 }
2879
2880 /*
2881 * Pick up in the destination map where we left off.
2882 *
2883 * Use the version information to avoid a lookup
2884 * in the normal case.
2885 */
2886
2887 start += copy_size;
2888 vm_map_lock(dst_map);
2889 if ((version.main_timestamp + 1) == dst_map->timestamp) {
2890 /* We can safely use saved tmp_entry value */
2891
2892 vm_map_clip_end(dst_map, tmp_entry, start);
2893 tmp_entry = tmp_entry->vme_next;
2894 } else {
2895 /* Must do lookup of tmp_entry */
2896
2897 if (!vm_map_lookup_entry(dst_map, start, &tmp_entry)) {
2898 vm_map_unlock(dst_map);
2899 return KERN_INVALID_ADDRESS;
2900 }
2901 vm_map_clip_start(dst_map, tmp_entry, start);
2902 }
2903 }
2904
2905 }
2906 vm_map_unlock(dst_map);
2907
2908 /*
2909 * Throw away the vm_map_copy object
2910 */
2911 vm_map_copy_discard(copy);
2912
2913 return KERN_SUCCESS;
2914 }
2915
2916 /*
2917 * Macro: vm_map_copy_insert
2918 *
2919 * Description:
2920 * Link a copy chain ("copy") into a map at the
2921 * specified location (after "where").
2922 * Side effects:
2923 * The copy chain is destroyed.
2924 * Warning:
2925 * The arguments are evaluated multiple times.
2926 */
2927 #define vm_map_copy_insert(map, where, copy) \
2928 MACRO_BEGIN \
2929 (((where)->vme_next)->vme_prev = vm_map_copy_last_entry(copy)) \
2930 ->vme_next = ((where)->vme_next); \
2931 ((where)->vme_next = vm_map_copy_first_entry(copy)) \
2932 ->vme_prev = (where); \
2933 (map)->hdr.nentries += (copy)->cpy_hdr.nentries; \
2934 zfree(vm_map_copy_zone, (vm_offset_t) copy); \
2935 MACRO_END
2936
2937 kern_return_t vm_map_copyout_page_list(
2938 vm_map_t dst_map,
2939 vm_offset_t *dst_addr, /* OUT */
2940 vm_map_copy_t copy); /* forward */
2941
2942 /*
2943 * Routine: vm_map_copyout
2944 *
2945 * Description:
2946 * Copy out a copy chain ("copy") into newly-allocated
2947 * space in the destination map.
2948 *
2949 * If successful, consumes the copy object.
2950 * Otherwise, the caller is responsible for it.
2951 */
2952 kern_return_t vm_map_copyout(
2953 register
2954 vm_map_t dst_map,
2955 vm_offset_t *dst_addr, /* OUT */
2956 register
2957 vm_map_copy_t copy)
2958 {
2959 vm_size_t size;
2960 vm_size_t adjustment;
2961 vm_offset_t start;
2962 vm_offset_t vm_copy_start;
2963 vm_map_entry_t last;
2964 register
2965 vm_map_entry_t entry;
2966
2967 /*
2968 * Check for null copy object.
2969 */
2970
2971 if (copy == VM_MAP_COPY_NULL) {
2972 *dst_addr = 0;
2973 return KERN_SUCCESS;
2974 }
2975
2976 /*
2977 * Check for special copy object, created
2978 * by vm_map_copyin_object.
2979 */
2980
2981 if (copy->type == VM_MAP_COPY_OBJECT) {
2982 vm_object_t object = copy->cpy_object;
2983 vm_size_t offset = copy->offset;
2984 vm_size_t tmp_size = copy->size;
2985 kern_return_t kr;
2986
2987 *dst_addr = 0;
2988 kr = vm_map_enter(dst_map, dst_addr, tmp_size,
2989 (vm_offset_t) 0, TRUE,
2990 object, offset, FALSE,
2991 VM_PROT_DEFAULT, VM_PROT_ALL,
2992 VM_INHERIT_DEFAULT);
2993 if (kr != KERN_SUCCESS)
2994 return kr;
2995 zfree(vm_map_copy_zone, (vm_offset_t) copy);
2996 return KERN_SUCCESS;
2997 }
2998
2999 if (copy->type == VM_MAP_COPY_PAGE_LIST)
3000 return vm_map_copyout_page_list(dst_map, dst_addr, copy);
3001
3002 /*
3003 * Find space for the data
3004 */
3005
3006 vm_copy_start = trunc_page(copy->offset);
3007 size = round_page(copy->offset + copy->size) - vm_copy_start;
3008
3009 StartAgain: ;
3010
3011 vm_map_lock(dst_map);
3012 start = ((last = dst_map->first_free) == vm_map_to_entry(dst_map)) ?
3013 vm_map_min(dst_map) : last->vme_end;
3014
3015 while (TRUE) {
3016 vm_map_entry_t next = last->vme_next;
3017 vm_offset_t end = start + size;
3018
3019 if ((end > dst_map->max_offset) || (end < start)) {
3020 if (dst_map->wait_for_space) {
3021 if (size <= (dst_map->max_offset - dst_map->min_offset)) {
3022 assert_wait((event_t) dst_map, TRUE);
3023 vm_map_unlock(dst_map);
3024 thread_block(CONTINUE_NULL);
3025 goto StartAgain;
3026 }
3027 }
3028 vm_map_unlock(dst_map);
3029 return KERN_NO_SPACE;
3030 }
3031
3032 if ((next == vm_map_to_entry(dst_map)) ||
3033 (next->vme_start >= end))
3034 break;
3035
3036 last = next;
3037 start = last->vme_end;
3038 }
3039
3040 /*
3041 * Since we're going to just drop the map
3042 * entries from the copy into the destination
3043 * map, they must come from the same pool.
3044 */
3045
3046 if (copy->cpy_hdr.entries_pageable != dst_map->hdr.entries_pageable) {
3047 /*
3048 * Mismatches occur when dealing with the default
3049 * pager.
3050 */
3051 zone_t old_zone;
3052 vm_map_entry_t next, new;
3053
3054 /*
3055 * Find the zone that the copies were allocated from
3056 */
3057 old_zone = (copy->cpy_hdr.entries_pageable)
3058 ? vm_map_entry_zone
3059 : vm_map_kentry_zone;
3060 entry = vm_map_copy_first_entry(copy);
3061
3062 /*
3063 * Reinitialize the copy so that vm_map_copy_entry_link
3064 * will work.
3065 */
3066 copy->cpy_hdr.nentries = 0;
3067 copy->cpy_hdr.entries_pageable = dst_map->hdr.entries_pageable;
3068 vm_map_copy_first_entry(copy) =
3069 vm_map_copy_last_entry(copy) =
3070 vm_map_copy_to_entry(copy);
3071
3072 /*
3073 * Copy each entry.
3074 */
3075 while (entry != vm_map_copy_to_entry(copy)) {
3076 new = vm_map_copy_entry_create(copy);
3077 vm_map_entry_copy_full(new, entry);
3078 vm_map_copy_entry_link(copy,
3079 vm_map_copy_last_entry(copy),
3080 new);
3081 next = entry->vme_next;
3082 zfree(old_zone, (vm_offset_t) entry);
3083 entry = next;
3084 }
3085 }
3086
3087 /*
3088 * Adjust the addresses in the copy chain, and
3089 * reset the region attributes.
3090 */
3091
3092 adjustment = start - vm_copy_start;
3093 for (entry = vm_map_copy_first_entry(copy);
3094 entry != vm_map_copy_to_entry(copy);
3095 entry = entry->vme_next) {
3096 entry->vme_start += adjustment;
3097 entry->vme_end += adjustment;
3098
3099 entry->inheritance = VM_INHERIT_DEFAULT;
3100 entry->protection = VM_PROT_DEFAULT;
3101 entry->max_protection = VM_PROT_ALL;
3102 #if NET_ATM
3103 entry->projected_on = 0;
3104 #endif /* NET_ATM */
3105
3106 /*
3107 * If the entry is now wired,
3108 * map the pages into the destination map.
3109 */
3110 if (entry->wired_count != 0) {
3111 register vm_offset_t va;
3112 vm_offset_t offset;
3113 register vm_object_t object;
3114
3115 object = entry->object.vm_object;
3116 offset = entry->offset;
3117 va = entry->vme_start;
3118
3119 pmap_pageable(dst_map->pmap,
3120 entry->vme_start,
3121 entry->vme_end,
3122 TRUE);
3123
3124 while (va < entry->vme_end) {
3125 register vm_page_t m;
3126
3127 /*
3128 * Look up the page in the object.
3129 * Assert that the page will be found in the
3130 * top object:
3131 * either
3132 * the object was newly created by
3133 * vm_object_copy_slowly, and has
3134 * copies of all of the pages from
3135 * the source object
3136 * or
3137 * the object was moved from the old
3138 * map entry; because the old map
3139 * entry was wired, all of the pages
3140 * were in the top-level object.
3141 * (XXX not true if we wire pages for
3142 * reading)
3143 */
3144 vm_object_lock(object);
3145 vm_object_paging_begin(object);
3146
3147 m = vm_page_lookup(object, offset);
3148 if (m == VM_PAGE_NULL || m->wire_count == 0 ||
3149 m->absent)
3150 panic("vm_map_copyout: wiring 0x%x", m);
3151
3152 m->busy = TRUE;
3153 vm_object_unlock(object);
3154
3155 PMAP_ENTER(dst_map->pmap, va, m,
3156 entry->protection, TRUE);
3157
3158 vm_object_lock(object);
3159 PAGE_WAKEUP_DONE(m);
3160 /* the page is wired, so we don't have to activate */
3161 vm_object_paging_end(object);
3162 vm_object_unlock(object);
3163
3164 offset += PAGE_SIZE;
3165 va += PAGE_SIZE;
3166 }
3167 }
3168
3169
3170 }
3171
3172 /*
3173 * Correct the page alignment for the result
3174 */
3175
3176 *dst_addr = start + (copy->offset - vm_copy_start);
3177
3178 /*
3179 * Update the hints and the map size
3180 */
3181
3182 if (dst_map->first_free == last)
3183 dst_map->first_free = vm_map_copy_last_entry(copy);
3184 SAVE_HINT(dst_map, vm_map_copy_last_entry(copy));
3185
3186 dst_map->size += size;
3187
3188 /*
3189 * Link in the copy
3190 */
3191
3192 vm_map_copy_insert(dst_map, last, copy);
3193
3194 vm_map_unlock(dst_map);
3195
3196 /*
3197 * XXX If wiring_required, call vm_map_pageable
3198 */
3199
3200 return KERN_SUCCESS;
3201 }
3202
3203 /*
3204 *
3205 * vm_map_copyout_page_list:
3206 *
3207 * Version of vm_map_copyout() for page list vm map copies.
3208 *
3209 */
3210 kern_return_t vm_map_copyout_page_list(
3211 register
3212 vm_map_t dst_map,
3213 vm_offset_t *dst_addr, /* OUT */
3214 register
3215 vm_map_copy_t copy)
3216 {
3217 vm_size_t size;
3218 vm_offset_t start;
3219 vm_offset_t end;
3220 vm_offset_t offset;
3221 vm_map_entry_t last;
3222 register
3223 vm_object_t object;
3224 vm_page_t *page_list, m;
3225 vm_map_entry_t entry;
3226 vm_offset_t old_last_offset;
3227 boolean_t cont_invoked, needs_wakeup = FALSE;
3228 kern_return_t result = KERN_SUCCESS;
3229 vm_map_copy_t orig_copy;
3230 vm_offset_t dst_offset;
3231 boolean_t must_wire;
3232
3233 /*
3234 * Make sure the pages are stolen, because we are
3235 * going to put them in a new object. Assume that
3236 * all pages are identical to first in this regard.
3237 */
3238
3239 page_list = ©->cpy_page_list[0];
3240 if ((*page_list)->tabled)
3241 vm_map_copy_steal_pages(copy);
3242
3243 /*
3244 * Find space for the data
3245 */
3246
3247 size = round_page(copy->offset + copy->size) -
3248 trunc_page(copy->offset);
3249 StartAgain:
3250 vm_map_lock(dst_map);
3251 must_wire = dst_map->wiring_required;
3252
3253 last = dst_map->first_free;
3254 if (last == vm_map_to_entry(dst_map)) {
3255 start = vm_map_min(dst_map);
3256 } else {
3257 start = last->vme_end;
3258 }
3259
3260 while (TRUE) {
3261 vm_map_entry_t next = last->vme_next;
3262 end = start + size;
3263
3264 if ((end > dst_map->max_offset) || (end < start)) {
3265 if (dst_map->wait_for_space) {
3266 if (size <= (dst_map->max_offset -
3267 dst_map->min_offset)) {
3268 assert_wait((event_t) dst_map, TRUE);
3269 vm_map_unlock(dst_map);
3270 thread_block(CONTINUE_NULL);
3271 goto StartAgain;
3272 }
3273 }
3274 vm_map_unlock(dst_map);
3275 return KERN_NO_SPACE;
3276 }
3277
3278 if ((next == vm_map_to_entry(dst_map)) ||
3279 (next->vme_start >= end)) {
3280 break;
3281 }
3282
3283 last = next;
3284 start = last->vme_end;
3285 }
3286
3287 /*
3288 * See whether we can avoid creating a new entry (and object) by
3289 * extending one of our neighbors. [So far, we only attempt to
3290 * extend from below.]
3291 *
3292 * The code path below here is a bit twisted. If any of the
3293 * extension checks fails, we branch to create_object. If
3294 * it all works, we fall out the bottom and goto insert_pages.
3295 */
3296 if (last == vm_map_to_entry(dst_map) ||
3297 last->vme_end != start ||
3298 last->is_shared != FALSE ||
3299 last->is_sub_map != FALSE ||
3300 last->inheritance != VM_INHERIT_DEFAULT ||
3301 last->protection != VM_PROT_DEFAULT ||
3302 last->max_protection != VM_PROT_ALL ||
3303 (must_wire ? (last->wired_count != 1 ||
3304 last->user_wired_count != 1) :
3305 (last->wired_count != 0))) {
3306 goto create_object;
3307 }
3308
3309 /*
3310 * If this entry needs an object, make one.
3311 */
3312 if (last->object.vm_object == VM_OBJECT_NULL) {
3313 object = vm_object_allocate(
3314 (vm_size_t)(last->vme_end - last->vme_start + size));
3315 last->object.vm_object = object;
3316 last->offset = 0;
3317 vm_object_lock(object);
3318 }
3319 else {
3320 vm_offset_t prev_offset = last->offset;
3321 vm_size_t prev_size = start - last->vme_start;
3322 vm_size_t new_size;
3323
3324 /*
3325 * This is basically vm_object_coalesce.
3326 */
3327
3328 object = last->object.vm_object;
3329 vm_object_lock(object);
3330
3331 /*
3332 * Try to collapse the object first
3333 */
3334 vm_object_collapse(object);
3335
3336 /*
3337 * Can't coalesce if pages not mapped to
3338 * last may be in use anyway:
3339 * . more than one reference
3340 * . paged out
3341 * . shadows another object
3342 * . has a copy elsewhere
3343 * . paging references (pages might be in page-list)
3344 */
3345
3346 if ((object->ref_count > 1) ||
3347 object->pager_created ||
3348 (object->shadow != VM_OBJECT_NULL) ||
3349 (object->copy != VM_OBJECT_NULL) ||
3350 (object->paging_in_progress != 0)) {
3351 vm_object_unlock(object);
3352 goto create_object;
3353 }
3354
3355 /*
3356 * Extend the object if necessary. Don't have to call
3357 * vm_object_page_remove because the pages aren't mapped,
3358 * and vm_page_replace will free up any old ones it encounters.
3359 */
3360 new_size = prev_offset + prev_size + size;
3361 if (new_size > object->size)
3362 object->size = new_size;
3363 }
3364
3365 /*
3366 * Coalesced the two objects - can extend
3367 * the previous map entry to include the
3368 * new range.
3369 */
3370 dst_map->size += size;
3371 last->vme_end = end;
3372
3373 SAVE_HINT(dst_map, last);
3374
3375 goto insert_pages;
3376
3377 create_object:
3378
3379 /*
3380 * Create object
3381 */
3382 object = vm_object_allocate(size);
3383
3384 /*
3385 * Create entry
3386 */
3387
3388 entry = vm_map_entry_create(dst_map);
3389
3390 entry->object.vm_object = object;
3391 entry->offset = 0;
3392
3393 entry->is_shared = FALSE;
3394 entry->is_sub_map = FALSE;
3395 entry->needs_copy = FALSE;
3396
3397 if (must_wire) {
3398 entry->wired_count = 1;
3399 entry->user_wired_count = 1;
3400 } else {
3401 entry->wired_count = 0;
3402 entry->user_wired_count = 0;
3403 }
3404
3405 entry->in_transition = TRUE;
3406 entry->needs_wakeup = FALSE;
3407
3408 entry->vme_start = start;
3409 entry->vme_end = start + size;
3410
3411 entry->inheritance = VM_INHERIT_DEFAULT;
3412 entry->protection = VM_PROT_DEFAULT;
3413 entry->max_protection = VM_PROT_ALL;
3414 #if NET_ATM
3415 entry->projected_on = 0;
3416 #endif /* NET_ATM */
3417
3418 vm_object_lock(object);
3419
3420 /*
3421 * Update the hints and the map size
3422 */
3423 if (dst_map->first_free == last) {
3424 dst_map->first_free = entry;
3425 }
3426 SAVE_HINT(dst_map, entry);
3427 dst_map->size += size;
3428
3429 /*
3430 * Link in the entry
3431 */
3432 vm_map_entry_link(dst_map, last, entry);
3433 last = entry;
3434
3435 /*
3436 * Transfer pages into new object.
3437 * Scan page list in vm_map_copy.
3438 */
3439 insert_pages:
3440 dst_offset = copy->offset & page_mask;
3441 cont_invoked = FALSE;
3442 orig_copy = copy;
3443 last->in_transition = TRUE;
3444 old_last_offset = last->offset
3445 + (start - last->vme_start);
3446
3447 vm_page_lock_queues();
3448
3449 for (offset = 0; offset < size; offset += PAGE_SIZE) {
3450 m = *page_list;
3451 assert(m && !m->tabled);
3452
3453 /*
3454 * Must clear busy bit in page before inserting it.
3455 * Ok to skip wakeup logic because nobody else
3456 * can possibly know about this page.
3457 * The page is dirty in its new object.
3458 */
3459
3460 assert(!m->wanted);
3461 m->busy = FALSE;
3462 m->dirty = TRUE;
3463 vm_page_replace(m, object, old_last_offset + offset);
3464 if (must_wire)
3465 vm_page_wire(m);
3466 else
3467 vm_page_activate(m);
3468
3469 *page_list++ = VM_PAGE_NULL;
3470 if (--(copy->cpy_npages) == 0 &&
3471 vm_map_copy_has_cont(copy)) {
3472 vm_map_copy_t new_copy;
3473
3474 /*
3475 * Ok to unlock map because entry is
3476 * marked in_transition.
3477 */
3478 cont_invoked = TRUE;
3479 vm_page_unlock_queues();
3480 vm_object_unlock(object);
3481 vm_map_unlock(dst_map);
3482 vm_map_copy_invoke_cont(copy, &new_copy, &result);
3483
3484 if (result == KERN_SUCCESS) {
3485
3486 /*
3487 * If we got back a copy with real pages,
3488 * steal them now. Either all of the
3489 * pages in the list are tabled or none
3490 * of them are; mixtures are not possible.
3491 *
3492 * Save original copy for consume on
3493 * success logic at end of routine.
3494 */
3495 if (copy != orig_copy)
3496 vm_map_copy_discard(copy);
3497
3498 if ((copy = new_copy) != VM_MAP_COPY_NULL) {
3499 page_list = ©->cpy_page_list[0];
3500 if ((*page_list)->tabled)
3501 vm_map_copy_steal_pages(copy);
3502 }
3503 }
3504 else {
3505 /*
3506 * Continuation failed.
3507 */
3508 vm_map_lock(dst_map);
3509 goto error;
3510 }
3511
3512 vm_map_lock(dst_map);
3513 vm_object_lock(object);
3514 vm_page_lock_queues();
3515 }
3516 }
3517
3518 vm_page_unlock_queues();
3519 vm_object_unlock(object);
3520
3521 *dst_addr = start + dst_offset;
3522
3523 /*
3524 * Clear the in transition bits. This is easy if we
3525 * didn't have a continuation.
3526 */
3527 error:
3528 if (!cont_invoked) {
3529 /*
3530 * We didn't unlock the map, so nobody could
3531 * be waiting.
3532 */
3533 last->in_transition = FALSE;
3534 assert(!last->needs_wakeup);
3535 needs_wakeup = FALSE;
3536 }
3537 else {
3538 if (!vm_map_lookup_entry(dst_map, start, &entry))
3539 panic("vm_map_copyout_page_list: missing entry");
3540
3541 /*
3542 * Clear transition bit for all constituent entries that
3543 * were in the original entry. Also check for waiters.
3544 */
3545 while((entry != vm_map_to_entry(dst_map)) &&
3546 (entry->vme_start < end)) {
3547 assert(entry->in_transition);
3548 entry->in_transition = FALSE;
3549 if(entry->needs_wakeup) {
3550 entry->needs_wakeup = FALSE;
3551 needs_wakeup = TRUE;
3552 }
3553 entry = entry->vme_next;
3554 }
3555 }
3556
3557 if (result != KERN_SUCCESS)
3558 vm_map_delete(dst_map, start, end);
3559
3560 vm_map_unlock(dst_map);
3561
3562 if (needs_wakeup)
3563 vm_map_entry_wakeup(dst_map);
3564
3565 /*
3566 * Consume on success logic.
3567 */
3568 if (copy != orig_copy) {
3569 zfree(vm_map_copy_zone, (vm_offset_t) copy);
3570 }
3571 if (result == KERN_SUCCESS) {
3572 zfree(vm_map_copy_zone, (vm_offset_t) orig_copy);
3573 }
3574
3575 return result;
3576 }
3577
3578 /*
3579 * Routine: vm_map_copyin
3580 *
3581 * Description:
3582 * Copy the specified region (src_addr, len) from the
3583 * source address space (src_map), possibly removing
3584 * the region from the source address space (src_destroy).
3585 *
3586 * Returns:
3587 * A vm_map_copy_t object (copy_result), suitable for
3588 * insertion into another address space (using vm_map_copyout),
3589 * copying over another address space region (using
3590 * vm_map_copy_overwrite). If the copy is unused, it
3591 * should be destroyed (using vm_map_copy_discard).
3592 *
3593 * In/out conditions:
3594 * The source map should not be locked on entry.
3595 */
3596 kern_return_t vm_map_copyin(
3597 vm_map_t src_map,
3598 vm_offset_t src_addr,
3599 vm_size_t len,
3600 boolean_t src_destroy,
3601 vm_map_copy_t *copy_result) /* OUT */
3602 {
3603 vm_map_entry_t tmp_entry; /* Result of last map lookup --
3604 * in multi-level lookup, this
3605 * entry contains the actual
3606 * vm_object/offset.
3607 */
3608
3609 vm_offset_t src_start; /* Start of current entry --
3610 * where copy is taking place now
3611 */
3612 vm_offset_t src_end; /* End of entire region to be
3613 * copied */
3614
3615 register
3616 vm_map_copy_t copy; /* Resulting copy */
3617
3618 /*
3619 * Check for copies of zero bytes.
3620 */
3621
3622 if (len == 0) {
3623 *copy_result = VM_MAP_COPY_NULL;
3624 return KERN_SUCCESS;
3625 }
3626
3627 /*
3628 * Compute start and end of region
3629 */
3630
3631 src_start = trunc_page(src_addr);
3632 src_end = round_page(src_addr + len);
3633
3634 /*
3635 * Check that the end address doesn't overflow
3636 */
3637
3638 if (src_end <= src_start)
3639 if ((src_end < src_start) || (src_start != 0))
3640 return KERN_INVALID_ADDRESS;
3641
3642 /*
3643 * Allocate a header element for the list.
3644 *
3645 * Use the start and end in the header to
3646 * remember the endpoints prior to rounding.
3647 */
3648
3649 copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
3650 vm_map_copy_first_entry(copy) =
3651 vm_map_copy_last_entry(copy) = vm_map_copy_to_entry(copy);
3652 copy->type = VM_MAP_COPY_ENTRY_LIST;
3653 copy->cpy_hdr.nentries = 0;
3654 copy->cpy_hdr.entries_pageable = TRUE;
3655
3656 copy->offset = src_addr;
3657 copy->size = len;
3658
3659 #define RETURN(x) \
3660 MACRO_BEGIN \
3661 vm_map_unlock(src_map); \
3662 vm_map_copy_discard(copy); \
3663 MACRO_RETURN(x); \
3664 MACRO_END
3665
3666 /*
3667 * Find the beginning of the region.
3668 */
3669
3670 vm_map_lock(src_map);
3671
3672 if (!vm_map_lookup_entry(src_map, src_start, &tmp_entry))
3673 RETURN(KERN_INVALID_ADDRESS);
3674 vm_map_clip_start(src_map, tmp_entry, src_start);
3675
3676 /*
3677 * Go through entries until we get to the end.
3678 */
3679
3680 while (TRUE) {
3681 register
3682 vm_map_entry_t src_entry = tmp_entry; /* Top-level entry */
3683 vm_size_t src_size; /* Size of source
3684 * map entry (in both
3685 * maps)
3686 */
3687
3688 register
3689 vm_object_t src_object; /* Object to copy */
3690 vm_offset_t src_offset;
3691
3692 boolean_t src_needs_copy; /* Should source map
3693 * be made read-only
3694 * for copy-on-write?
3695 */
3696
3697 register
3698 vm_map_entry_t new_entry; /* Map entry for copy */
3699 boolean_t new_entry_needs_copy; /* Will new entry be COW? */
3700
3701 boolean_t was_wired; /* Was source wired? */
3702 vm_map_version_t version; /* Version before locks
3703 * dropped to make copy
3704 */
3705
3706 /*
3707 * Verify that the region can be read.
3708 */
3709
3710 if (! (src_entry->protection & VM_PROT_READ))
3711 RETURN(KERN_PROTECTION_FAILURE);
3712
3713 /*
3714 * Clip against the endpoints of the entire region.
3715 */
3716
3717 vm_map_clip_end(src_map, src_entry, src_end);
3718
3719 src_size = src_entry->vme_end - src_start;
3720 src_object = src_entry->object.vm_object;
3721 src_offset = src_entry->offset;
3722 was_wired = (src_entry->wired_count != 0);
3723
3724 /*
3725 * Create a new address map entry to
3726 * hold the result. Fill in the fields from
3727 * the appropriate source entries.
3728 */
3729
3730 new_entry = vm_map_copy_entry_create(copy);
3731 vm_map_entry_copy(new_entry, src_entry);
3732
3733 /*
3734 * Attempt non-blocking copy-on-write optimizations.
3735 */
3736
3737 if (src_destroy &&
3738 (src_object == VM_OBJECT_NULL || src_object->temporary) &&
3739 !src_object->use_shared_copy)
3740 {
3741 /*
3742 * If we are destroying the source, and the object
3743 * is temporary, and not shared writable,
3744 * we can move the object reference
3745 * from the source to the copy. The copy is
3746 * copy-on-write only if the source is.
3747 * We make another reference to the object, because
3748 * destroying the source entry will deallocate it.
3749 */
3750 vm_object_reference(src_object);
3751
3752 /*
3753 * Copy is always unwired. vm_map_copy_entry
3754 * set its wired count to zero.
3755 */
3756
3757 goto CopySuccessful;
3758 }
3759
3760 if (!was_wired &&
3761 vm_object_copy_temporary(
3762 &new_entry->object.vm_object,
3763 &new_entry->offset,
3764 &src_needs_copy,
3765 &new_entry_needs_copy)) {
3766
3767 new_entry->needs_copy = new_entry_needs_copy;
3768
3769 /*
3770 * Handle copy-on-write obligations
3771 */
3772
3773 if (src_needs_copy && !tmp_entry->needs_copy) {
3774 vm_object_pmap_protect(
3775 src_object,
3776 src_offset,
3777 src_size,
3778 (src_entry->is_shared ? PMAP_NULL
3779 : src_map->pmap),
3780 src_entry->vme_start,
3781 src_entry->protection &
3782 ~VM_PROT_WRITE);
3783
3784 tmp_entry->needs_copy = TRUE;
3785 }
3786
3787 /*
3788 * The map has never been unlocked, so it's safe to
3789 * move to the next entry rather than doing another
3790 * lookup.
3791 */
3792
3793 goto CopySuccessful;
3794 }
3795
3796 new_entry->needs_copy = FALSE;
3797
3798 /*
3799 * Take an object reference, so that we may
3800 * release the map lock(s).
3801 */
3802
3803 assert(src_object != VM_OBJECT_NULL);
3804 vm_object_reference(src_object);
3805
3806 /*
3807 * Record the timestamp for later verification.
3808 * Unlock the map.
3809 */
3810
3811 version.main_timestamp = src_map->timestamp;
3812 vm_map_unlock(src_map);
3813
3814 /*
3815 * Perform the copy
3816 */
3817
3818 if (was_wired) {
3819 vm_object_lock(src_object);
3820 (void) vm_object_copy_slowly(
3821 src_object,
3822 src_offset,
3823 src_size,
3824 FALSE,
3825 &new_entry->object.vm_object);
3826 new_entry->offset = 0;
3827 new_entry->needs_copy = FALSE;
3828 } else {
3829 kern_return_t result;
3830
3831 result = vm_object_copy_strategically(src_object,
3832 src_offset,
3833 src_size,
3834 &new_entry->object.vm_object,
3835 &new_entry->offset,
3836 &new_entry_needs_copy);
3837
3838 new_entry->needs_copy = new_entry_needs_copy;
3839
3840
3841 if (result != KERN_SUCCESS) {
3842 vm_map_copy_entry_dispose(copy, new_entry);
3843
3844 vm_map_lock(src_map);
3845 RETURN(result);
3846 }
3847
3848 }
3849
3850 /*
3851 * Throw away the extra reference
3852 */
3853
3854 vm_object_deallocate(src_object);
3855
3856 /*
3857 * Verify that the map has not substantially
3858 * changed while the copy was being made.
3859 */
3860
3861 vm_map_lock(src_map); /* Increments timestamp once! */
3862
3863 if ((version.main_timestamp + 1) == src_map->timestamp)
3864 goto CopySuccessful;
3865
3866 /*
3867 * Simple version comparison failed.
3868 *
3869 * Retry the lookup and verify that the
3870 * same object/offset are still present.
3871 *
3872 * [Note: a memory manager that colludes with
3873 * the calling task can detect that we have
3874 * cheated. While the map was unlocked, the
3875 * mapping could have been changed and restored.]
3876 */
3877
3878 if (!vm_map_lookup_entry(src_map, src_start, &tmp_entry)) {
3879 vm_map_copy_entry_dispose(copy, new_entry);
3880 RETURN(KERN_INVALID_ADDRESS);
3881 }
3882
3883 src_entry = tmp_entry;
3884 vm_map_clip_start(src_map, src_entry, src_start);
3885
3886 if ((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE)
3887 goto VerificationFailed;
3888
3889 if (src_entry->vme_end < new_entry->vme_end)
3890 src_size = (new_entry->vme_end = src_entry->vme_end) - src_start;
3891
3892 if ((src_entry->object.vm_object != src_object) ||
3893 (src_entry->offset != src_offset) ) {
3894
3895 /*
3896 * Verification failed.
3897 *
3898 * Start over with this top-level entry.
3899 */
3900
3901 VerificationFailed: ;
3902
3903 vm_object_deallocate(new_entry->object.vm_object);
3904 vm_map_copy_entry_dispose(copy, new_entry);
3905 tmp_entry = src_entry;
3906 continue;
3907 }
3908
3909 /*
3910 * Verification succeeded.
3911 */
3912
3913 CopySuccessful: ;
3914
3915 /*
3916 * Link in the new copy entry.
3917 */
3918
3919 vm_map_copy_entry_link(copy, vm_map_copy_last_entry(copy),
3920 new_entry);
3921
3922 /*
3923 * Determine whether the entire region
3924 * has been copied.
3925 */
3926 src_start = new_entry->vme_end;
3927 if ((src_start >= src_end) && (src_end != 0))
3928 break;
3929
3930 /*
3931 * Verify that there are no gaps in the region
3932 */
3933
3934 tmp_entry = src_entry->vme_next;
3935 if (tmp_entry->vme_start != src_start)
3936 RETURN(KERN_INVALID_ADDRESS);
3937 }
3938
3939 /*
3940 * If the source should be destroyed, do it now, since the
3941 * copy was successful.
3942 */
3943 if (src_destroy)
3944 (void) vm_map_delete(src_map, trunc_page(src_addr), src_end);
3945
3946 vm_map_unlock(src_map);
3947
3948 *copy_result = copy;
3949 return KERN_SUCCESS;
3950
3951 #undef RETURN
3952 }
3953
3954 /*
3955 * vm_map_copyin_object:
3956 *
3957 * Create a copy object from an object.
3958 * Our caller donates an object reference.
3959 */
3960
3961 kern_return_t vm_map_copyin_object(
3962 vm_object_t object,
3963 vm_offset_t offset, /* offset of region in object */
3964 vm_size_t size, /* size of region in object */
3965 vm_map_copy_t *copy_result) /* OUT */
3966 {
3967 vm_map_copy_t copy; /* Resulting copy */
3968
3969 /*
3970 * We drop the object into a special copy object
3971 * that contains the object directly. These copy objects
3972 * are distinguished by entries_pageable == FALSE
3973 * and null links.
3974 */
3975
3976 copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
3977 vm_map_copy_first_entry(copy) =
3978 vm_map_copy_last_entry(copy) = VM_MAP_ENTRY_NULL;
3979 copy->type = VM_MAP_COPY_OBJECT;
3980 copy->cpy_object = object;
3981 copy->offset = offset;
3982 copy->size = size;
3983
3984 *copy_result = copy;
3985 return KERN_SUCCESS;
3986 }
3987
3988 /*
3989 * vm_map_copyin_page_list_cont:
3990 *
3991 * Continuation routine for vm_map_copyin_page_list.
3992 *
3993 * If vm_map_copyin_page_list can't fit the entire vm range
3994 * into a single page list object, it creates a continuation.
3995 * When the target of the operation has used the pages in the
3996 * initial page list, it invokes the continuation, which calls
3997 * this routine. If an error happens, the continuation is aborted
3998 * (abort arg to this routine is TRUE). To avoid deadlocks, the
3999 * pages are discarded from the initial page list before invoking
4000 * the continuation.
4001 *
4002 * NOTE: This is not the same sort of continuation used by
4003 * the scheduler.
4004 */
4005
4006 kern_return_t vm_map_copyin_page_list_cont(
4007 void * args,
4008 vm_map_copy_t *copy_result) /* OUT */
4009 {
4010 vm_map_copyin_args_t cont_args = (vm_map_copyin_args_t) args;
4011 kern_return_t result = KERN_SUCCESS;
4012 register boolean_t do_abort, src_destroy, src_destroy_only;
4013
4014 /*
4015 * Check for cases that only require memory destruction.
4016 */
4017 do_abort = (copy_result == (vm_map_copy_t *) 0);
4018 src_destroy = (cont_args->destroy_len != (vm_size_t) 0);
4019 src_destroy_only = (cont_args->src_len == (vm_size_t) 0);
4020
4021 if (do_abort || src_destroy_only) {
4022 if (src_destroy)
4023 result = vm_map_remove(cont_args->map,
4024 cont_args->destroy_addr,
4025 cont_args->destroy_addr + cont_args->destroy_len);
4026 if (!do_abort)
4027 *copy_result = VM_MAP_COPY_NULL;
4028 }
4029 else {
4030 result = vm_map_copyin_page_list(cont_args->map,
4031 cont_args->src_addr, cont_args->src_len, src_destroy,
4032 cont_args->steal_pages, copy_result, TRUE);
4033
4034 if (src_destroy && !cont_args->steal_pages &&
4035 vm_map_copy_has_cont(*copy_result)) {
4036 vm_map_copyin_args_t new_args;
4037 /*
4038 * Transfer old destroy info.
4039 */
4040 new_args = (vm_map_copyin_args_t)
4041 (*copy_result)->cpy_cont_args;
4042 new_args->destroy_addr = cont_args->destroy_addr;
4043 new_args->destroy_len = cont_args->destroy_len;
4044 }
4045 }
4046
4047 vm_map_deallocate(cont_args->map);
4048 kfree((vm_offset_t)cont_args, sizeof(vm_map_copyin_args_data_t));
4049
4050 return result;
4051 }
4052
4053 /*
4054 * vm_map_copyin_page_list:
4055 *
4056 * This is a variant of vm_map_copyin that copies in a list of pages.
4057 * If steal_pages is TRUE, the pages are only in the returned list.
4058 * If steal_pages is FALSE, the pages are busy and still in their
4059 * objects. A continuation may be returned if not all the pages fit:
4060 * the recipient of this copy_result must be prepared to deal with it.
4061 */
4062
4063 kern_return_t vm_map_copyin_page_list(
4064 vm_map_t src_map,
4065 vm_offset_t src_addr,
4066 vm_size_t len,
4067 boolean_t src_destroy,
4068 boolean_t steal_pages,
4069 vm_map_copy_t *copy_result, /* OUT */
4070 boolean_t is_cont)
4071 {
4072 vm_map_entry_t src_entry;
4073 vm_page_t m;
4074 vm_offset_t src_start;
4075 vm_offset_t src_end;
4076 vm_size_t src_size;
4077 register
4078 vm_object_t src_object;
4079 register
4080 vm_offset_t src_offset;
4081 vm_offset_t src_last_offset;
4082 register
4083 vm_map_copy_t copy; /* Resulting copy */
4084 kern_return_t result = KERN_SUCCESS;
4085 boolean_t need_map_lookup;
4086 vm_map_copyin_args_t cont_args;
4087
4088 /*
4089 * If steal_pages is FALSE, this leaves busy pages in
4090 * the object. A continuation must be used if src_destroy
4091 * is true in this case (!steal_pages && src_destroy).
4092 *
4093 * XXX Still have a more general problem of what happens
4094 * XXX if the same page occurs twice in a list. Deadlock
4095 * XXX can happen if vm_fault_page was called. A
4096 * XXX possible solution is to use a continuation if vm_fault_page
4097 * XXX is called and we cross a map entry boundary.
4098 */
4099
4100 /*
4101 * Check for copies of zero bytes.
4102 */
4103
4104 if (len == 0) {
4105 *copy_result = VM_MAP_COPY_NULL;
4106 return KERN_SUCCESS;
4107 }
4108
4109 /*
4110 * Compute start and end of region
4111 */
4112
4113 src_start = trunc_page(src_addr);
4114 src_end = round_page(src_addr + len);
4115
4116 /*
4117 * Check that the end address doesn't overflow
4118 */
4119
4120 if (src_end <= src_start && (src_end < src_start || src_start != 0)) {
4121 return KERN_INVALID_ADDRESS;
4122 }
4123
4124 /*
4125 * Allocate a header element for the page list.
4126 *
4127 * Record original offset and size, as caller may not
4128 * be page-aligned.
4129 */
4130
4131 copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
4132 copy->type = VM_MAP_COPY_PAGE_LIST;
4133 copy->cpy_npages = 0;
4134 copy->offset = src_addr;
4135 copy->size = len;
4136 copy->cpy_cont = (vm_map_copyin_cont_t) 0;
4137 copy->cpy_cont_args = (void *) VM_MAP_COPYIN_ARGS_NULL;
4138
4139 /*
4140 * Find the beginning of the region.
4141 */
4142
4143 do_map_lookup:
4144
4145 vm_map_lock(src_map);
4146
4147 if (!vm_map_lookup_entry(src_map, src_start, &src_entry)) {
4148 result = KERN_INVALID_ADDRESS;
4149 goto error;
4150 }
4151 need_map_lookup = FALSE;
4152
4153 /*
4154 * Go through entries until we get to the end.
4155 */
4156
4157 while (TRUE) {
4158
4159 if (! (src_entry->protection & VM_PROT_READ)) {
4160 result = KERN_PROTECTION_FAILURE;
4161 goto error;
4162 }
4163
4164 if (src_end > src_entry->vme_end)
4165 src_size = src_entry->vme_end - src_start;
4166 else
4167 src_size = src_end - src_start;
4168
4169 src_object = src_entry->object.vm_object;
4170 src_offset = src_entry->offset +
4171 (src_start - src_entry->vme_start);
4172
4173 /*
4174 * If src_object is NULL, allocate it now;
4175 * we're going to fault on it shortly.
4176 */
4177 if (src_object == VM_OBJECT_NULL) {
4178 src_object = vm_object_allocate((vm_size_t)
4179 src_entry->vme_end -
4180 src_entry->vme_start);
4181 src_entry->object.vm_object = src_object;
4182 }
4183
4184 /*
4185 * Iterate over pages. Fault in ones that aren't present.
4186 */
4187 src_last_offset = src_offset + src_size;
4188 for (; (src_offset < src_last_offset && !need_map_lookup);
4189 src_offset += PAGE_SIZE, src_start += PAGE_SIZE) {
4190
4191 if (copy->cpy_npages == VM_MAP_COPY_PAGE_LIST_MAX) {
4192 make_continuation:
4193 /*
4194 * At this point we have the max number of
4195 * pages busy for this thread that we're
4196 * willing to allow. Stop here and record
4197 * arguments for the remainder. Note:
4198 * this means that this routine isn't atomic,
4199 * but that's the breaks. Note that only
4200 * the first vm_map_copy_t that comes back
4201 * from this routine has the right offset
4202 * and size; those from continuations are
4203 * page rounded, and short by the amount
4204 * already done.
4205 *
4206 * Reset src_end so the src_destroy
4207 * code at the bottom doesn't do
4208 * something stupid.
4209 */
4210
4211 cont_args = (vm_map_copyin_args_t)
4212 kalloc(sizeof(vm_map_copyin_args_data_t));
4213 cont_args->map = src_map;
4214 vm_map_reference(src_map);
4215 cont_args->src_addr = src_start;
4216 cont_args->src_len = len - (src_start - src_addr);
4217 if (src_destroy) {
4218 cont_args->destroy_addr = cont_args->src_addr;
4219 cont_args->destroy_len = cont_args->src_len;
4220 }
4221 else {
4222 cont_args->destroy_addr = (vm_offset_t) 0;
4223 cont_args->destroy_len = (vm_offset_t) 0;
4224 }
4225 cont_args->steal_pages = steal_pages;
4226
4227 copy->cpy_cont_args = (void *) cont_args;
4228 copy->cpy_cont = vm_map_copyin_page_list_cont;
4229
4230 src_end = src_start;
4231 vm_map_clip_end(src_map, src_entry, src_end);
4232 break;
4233 }
4234
4235 /*
4236 * Try to find the page of data.
4237 */
4238 vm_object_lock(src_object);
4239 vm_object_paging_begin(src_object);
4240 if (((m = vm_page_lookup(src_object, src_offset)) !=
4241 VM_PAGE_NULL) && !m->busy && !m->fictitious &&
4242 !m->absent && !m->error) {
4243
4244 /*
4245 * This is the page. Mark it busy
4246 * and keep the paging reference on
4247 * the object whilst we do our thing.
4248 */
4249 m->busy = TRUE;
4250
4251 /*
4252 * Also write-protect the page, so
4253 * that the map`s owner cannot change
4254 * the data. The busy bit will prevent
4255 * faults on the page from succeeding
4256 * until the copy is released; after
4257 * that, the page can be re-entered
4258 * as writable, since we didn`t alter
4259 * the map entry. This scheme is a
4260 * cheap copy-on-write.
4261 *
4262 * Don`t forget the protection and
4263 * the page_lock value!
4264 *
4265 * If the source is being destroyed
4266 * AND not shared writable, we don`t
4267 * have to protect the page, since
4268 * we will destroy the (only)
4269 * writable mapping later.
4270 */
4271 if (!src_destroy ||
4272 src_object->use_shared_copy)
4273 {
4274 pmap_page_protect(m->phys_addr,
4275 src_entry->protection
4276 & ~m->page_lock
4277 & ~VM_PROT_WRITE);
4278 }
4279
4280 }
4281 else {
4282 vm_prot_t result_prot;
4283 vm_page_t top_page;
4284 kern_return_t kr;
4285
4286 /*
4287 * Have to fault the page in; must
4288 * unlock the map to do so. While
4289 * the map is unlocked, anything
4290 * can happen, we must lookup the
4291 * map entry before continuing.
4292 */
4293 vm_map_unlock(src_map);
4294 need_map_lookup = TRUE;
4295 retry:
4296 result_prot = VM_PROT_READ;
4297
4298 kr = vm_fault_page(src_object, src_offset,
4299 VM_PROT_READ, FALSE, FALSE,
4300 &result_prot, &m, &top_page,
4301 FALSE, CONTINUE_NULL);
4302 /*
4303 * Cope with what happened.
4304 */
4305 switch (kr) {
4306 case VM_FAULT_SUCCESS:
4307 break;
4308 case VM_FAULT_INTERRUPTED: /* ??? */
4309 case VM_FAULT_RETRY:
4310 vm_object_lock(src_object);
4311 vm_object_paging_begin(src_object);
4312 goto retry;
4313 case VM_FAULT_MEMORY_SHORTAGE:
4314 VM_PAGE_WAIT(CONTINUE_NULL);
4315 vm_object_lock(src_object);
4316 vm_object_paging_begin(src_object);
4317 goto retry;
4318 case VM_FAULT_FICTITIOUS_SHORTAGE:
4319 vm_page_more_fictitious();
4320 vm_object_lock(src_object);
4321 vm_object_paging_begin(src_object);
4322 goto retry;
4323 case VM_FAULT_MEMORY_ERROR:
4324 /*
4325 * Something broke. If this
4326 * is a continuation, return
4327 * a partial result if possible,
4328 * else fail the whole thing.
4329 * In the continuation case, the
4330 * next continuation call will
4331 * get this error if it persists.
4332 */
4333 vm_map_lock(src_map);
4334 if (is_cont &&
4335 copy->cpy_npages != 0)
4336 goto make_continuation;
4337
4338 result = KERN_MEMORY_ERROR;
4339 goto error;
4340 }
4341
4342 if (top_page != VM_PAGE_NULL) {
4343 vm_object_lock(src_object);
4344 VM_PAGE_FREE(top_page);
4345 vm_object_paging_end(src_object);
4346 vm_object_unlock(src_object);
4347 }
4348
4349 /*
4350 * We do not need to write-protect
4351 * the page, since it cannot have
4352 * been in the pmap (and we did not
4353 * enter it above). The busy bit
4354 * will protect the page from being
4355 * entered as writable until it is
4356 * unlocked.
4357 */
4358
4359 }
4360
4361 /*
4362 * The page is busy, its object is locked, and
4363 * we have a paging reference on it. Either
4364 * the map is locked, or need_map_lookup is
4365 * TRUE.
4366 *
4367 * Put the page in the page list.
4368 */
4369 copy->cpy_page_list[copy->cpy_npages++] = m;
4370 vm_object_unlock(m->object);
4371 }
4372
4373 /*
4374 * DETERMINE whether the entire region
4375 * has been copied.
4376 */
4377 if (src_start >= src_end && src_end != 0) {
4378 if (need_map_lookup)
4379 vm_map_lock(src_map);
4380 break;
4381 }
4382
4383 /*
4384 * If need_map_lookup is TRUE, have to start over with
4385 * another map lookup. Note that we dropped the map
4386 * lock (to call vm_fault_page) above only in this case.
4387 */
4388 if (need_map_lookup)
4389 goto do_map_lookup;
4390
4391 /*
4392 * Verify that there are no gaps in the region
4393 */
4394
4395 src_start = src_entry->vme_end;
4396 src_entry = src_entry->vme_next;
4397 if (src_entry->vme_start != src_start) {
4398 result = KERN_INVALID_ADDRESS;
4399 goto error;
4400 }
4401 }
4402
4403 /*
4404 * If steal_pages is true, make sure all
4405 * pages in the copy are not in any object
4406 * We try to remove them from the original
4407 * object, but we may have to copy them.
4408 *
4409 * At this point every page in the list is busy
4410 * and holds a paging reference to its object.
4411 * When we're done stealing, every page is busy,
4412 * and in no object (m->tabled == FALSE).
4413 */
4414 src_start = trunc_page(src_addr);
4415 if (steal_pages) {
4416 register int i;
4417 vm_offset_t unwire_end;
4418
4419 unwire_end = src_start;
4420 for (i = 0; i < copy->cpy_npages; i++) {
4421
4422 /*
4423 * Remove the page from its object if it
4424 * can be stolen. It can be stolen if:
4425 *
4426 * (1) The source is being destroyed,
4427 * the object is temporary, and
4428 * not shared.
4429 * (2) The page is not precious.
4430 *
4431 * The not shared check consists of two
4432 * parts: (a) there are no objects that
4433 * shadow this object. (b) it is not the
4434 * object in any shared map entries (i.e.,
4435 * use_shared_copy is not set).
4436 *
4437 * The first check (a) means that we can't
4438 * steal pages from objects that are not
4439 * at the top of their shadow chains. This
4440 * should not be a frequent occurrence.
4441 *
4442 * Stealing wired pages requires telling the
4443 * pmap module to let go of them.
4444 *
4445 * NOTE: stealing clean pages from objects
4446 * whose mappings survive requires a call to
4447 * the pmap module. Maybe later.
4448 */
4449 m = copy->cpy_page_list[i];
4450 src_object = m->object;
4451 vm_object_lock(src_object);
4452
4453 if (src_destroy &&
4454 src_object->temporary &&
4455 (!src_object->shadowed) &&
4456 (!src_object->use_shared_copy) &&
4457 !m->precious) {
4458 vm_offset_t page_vaddr;
4459
4460 page_vaddr = src_start + (i * PAGE_SIZE);
4461 if (m->wire_count > 0) {
4462
4463 assert(m->wire_count == 1);
4464 /*
4465 * In order to steal a wired
4466 * page, we have to unwire it
4467 * first. We do this inline
4468 * here because we have the page.
4469 *
4470 * Step 1: Unwire the map entry.
4471 * Also tell the pmap module
4472 * that this piece of the
4473 * pmap is pageable.
4474 */
4475 vm_object_unlock(src_object);
4476 if (page_vaddr >= unwire_end) {
4477 if (!vm_map_lookup_entry(src_map,
4478 page_vaddr, &src_entry))
4479 panic("vm_map_copyin_page_list: missing wired map entry");
4480
4481 vm_map_clip_start(src_map, src_entry,
4482 page_vaddr);
4483 vm_map_clip_end(src_map, src_entry,
4484 src_start + src_size);
4485
4486 assert(src_entry->wired_count > 0);
4487 src_entry->wired_count = 0;
4488 src_entry->user_wired_count = 0;
4489 unwire_end = src_entry->vme_end;
4490 pmap_pageable(vm_map_pmap(src_map),
4491 page_vaddr, unwire_end, TRUE);
4492 }
4493
4494 /*
4495 * Step 2: Unwire the page.
4496 * pmap_remove handles this for us.
4497 */
4498 vm_object_lock(src_object);
4499 }
4500
4501 /*
4502 * Don't need to remove the mapping;
4503 * vm_map_delete will handle it.
4504 *
4505 * Steal the page. Setting the wire count
4506 * to zero is vm_page_unwire without
4507 * activating the page.
4508 */
4509 vm_page_lock_queues();
4510 vm_page_remove(m);
4511 if (m->wire_count > 0) {
4512 m->wire_count = 0;
4513 vm_page_wire_count--;
4514 } else {
4515 VM_PAGE_QUEUES_REMOVE(m);
4516 }
4517 vm_page_unlock_queues();
4518 }
4519 else {
4520 /*
4521 * Have to copy this page. Have to
4522 * unlock the map while copying,
4523 * hence no further page stealing.
4524 * Hence just copy all the pages.
4525 * Unlock the map while copying;
4526 * This means no further page stealing.
4527 */
4528 vm_object_unlock(src_object);
4529 vm_map_unlock(src_map);
4530
4531 vm_map_copy_steal_pages(copy);
4532
4533 vm_map_lock(src_map);
4534 break;
4535 }
4536
4537 vm_object_paging_end(src_object);
4538 vm_object_unlock(src_object);
4539 }
4540
4541 /*
4542 * If the source should be destroyed, do it now, since the
4543 * copy was successful.
4544 */
4545
4546 if (src_destroy) {
4547 (void) vm_map_delete(src_map, src_start, src_end);
4548 }
4549 }
4550 else {
4551 /*
4552 * !steal_pages leaves busy pages in the map.
4553 * This will cause src_destroy to hang. Use
4554 * a continuation to prevent this.
4555 */
4556 if (src_destroy && !vm_map_copy_has_cont(copy)) {
4557 cont_args = (vm_map_copyin_args_t)
4558 kalloc(sizeof(vm_map_copyin_args_data_t));
4559 vm_map_reference(src_map);
4560 cont_args->map = src_map;
4561 cont_args->src_addr = (vm_offset_t) 0;
4562 cont_args->src_len = (vm_size_t) 0;
4563 cont_args->destroy_addr = src_start;
4564 cont_args->destroy_len = src_end - src_start;
4565 cont_args->steal_pages = FALSE;
4566
4567 copy->cpy_cont_args = (void *) cont_args;
4568 copy->cpy_cont = vm_map_copyin_page_list_cont;
4569 }
4570
4571 }
4572
4573 vm_map_unlock(src_map);
4574
4575 *copy_result = copy;
4576 return result;
4577
4578 error:
4579 vm_map_unlock(src_map);
4580 vm_map_copy_discard(copy);
4581 return result;
4582 }
4583
4584 /*
4585 * vm_map_fork:
4586 *
4587 * Create and return a new map based on the old
4588 * map, according to the inheritance values on the
4589 * regions in that map.
4590 *
4591 * The source map must not be locked.
4592 */
4593 vm_map_t vm_map_fork(
4594 vm_map_t old_map)
4595 {
4596 vm_map_t new_map;
4597 register
4598 vm_map_entry_t old_entry;
4599 register
4600 vm_map_entry_t new_entry;
4601 pmap_t new_pmap = pmap_create((vm_size_t) 0);
4602 vm_size_t new_size = 0;
4603 vm_size_t entry_size;
4604 register
4605 vm_object_t object;
4606
4607 vm_map_lock(old_map);
4608
4609 new_map = vm_map_create(new_pmap,
4610 old_map->min_offset,
4611 old_map->max_offset,
4612 old_map->hdr.entries_pageable);
4613
4614 for (
4615 old_entry = vm_map_first_entry(old_map);
4616 old_entry != vm_map_to_entry(old_map);
4617 ) {
4618 if (old_entry->is_sub_map)
4619 panic("vm_map_fork: encountered a submap");
4620
4621 entry_size = (old_entry->vme_end - old_entry->vme_start);
4622
4623 switch (old_entry->inheritance) {
4624 case VM_INHERIT_NONE:
4625 break;
4626
4627 case VM_INHERIT_SHARE:
4628 /*
4629 * New sharing code. New map entry
4630 * references original object. Temporary
4631 * objects use asynchronous copy algorithm for
4632 * future copies. First make sure we have
4633 * the right object. If we need a shadow,
4634 * or someone else already has one, then
4635 * make a new shadow and share it.
4636 */
4637
4638 object = old_entry->object.vm_object;
4639 if (object == VM_OBJECT_NULL) {
4640 object = vm_object_allocate(
4641 (vm_size_t)(old_entry->vme_end -
4642 old_entry->vme_start));
4643 old_entry->offset = 0;
4644 old_entry->object.vm_object = object;
4645 assert(!old_entry->needs_copy);
4646 }
4647 else if (old_entry->needs_copy || object->shadowed ||
4648 (object->temporary && !old_entry->is_shared &&
4649 object->size > (vm_size_t)(old_entry->vme_end -
4650 old_entry->vme_start))) {
4651
4652 assert(object->temporary);
4653 assert(!(object->shadowed && old_entry->is_shared));
4654 vm_object_shadow(
4655 &old_entry->object.vm_object,
4656 &old_entry->offset,
4657 (vm_size_t) (old_entry->vme_end -
4658 old_entry->vme_start));
4659
4660 /*
4661 * If we're making a shadow for other than
4662 * copy on write reasons, then we have
4663 * to remove write permission.
4664 */
4665
4666 if (!old_entry->needs_copy &&
4667 (old_entry->protection & VM_PROT_WRITE)) {
4668 pmap_protect(vm_map_pmap(old_map),
4669 old_entry->vme_start,
4670 old_entry->vme_end,
4671 old_entry->protection &
4672 ~VM_PROT_WRITE);
4673 }
4674 old_entry->needs_copy = FALSE;
4675 object = old_entry->object.vm_object;
4676 }
4677
4678 /*
4679 * Set use_shared_copy to indicate that
4680 * object must use shared (delayed) copy-on
4681 * write. This is ignored for permanent objects.
4682 * Bump the reference count for the new entry
4683 */
4684
4685 vm_object_lock(object);
4686 object->use_shared_copy = TRUE;
4687 object->ref_count++;
4688 vm_object_unlock(object);
4689
4690 /*
4691 * Clone the entry, using object ref from above.
4692 * Mark both entries as shared.
4693 */
4694
4695 new_entry = vm_map_entry_create(new_map);
4696 #if NET_ATM
4697 if (old_entry->projected_on != 0) {
4698 /*
4699 * If entry is projected buffer, clone the
4700 * entry exactly.
4701 */
4702 vm_map_entry_copy_full(new_entry, old_entry);
4703 }
4704 else {
4705 #endif /* NET_ATM */
4706 vm_map_entry_copy(new_entry, old_entry);
4707 old_entry->is_shared = TRUE;
4708 new_entry->is_shared = TRUE;
4709 #if NET_ATM
4710 }
4711 #endif /* NET_ATM */
4712
4713 /*
4714 * Insert the entry into the new map -- we
4715 * know we're inserting at the end of the new
4716 * map.
4717 */
4718
4719 vm_map_entry_link(
4720 new_map,
4721 vm_map_last_entry(new_map),
4722 new_entry);
4723
4724 /*
4725 * Update the physical map
4726 */
4727
4728 pmap_copy(new_map->pmap, old_map->pmap,
4729 new_entry->vme_start,
4730 entry_size,
4731 old_entry->vme_start);
4732
4733 new_size += entry_size;
4734 break;
4735
4736 case VM_INHERIT_COPY:
4737 if (old_entry->wired_count == 0) {
4738 boolean_t src_needs_copy;
4739 boolean_t new_entry_needs_copy;
4740
4741 new_entry = vm_map_entry_create(new_map);
4742 vm_map_entry_copy(new_entry, old_entry);
4743
4744 if (vm_object_copy_temporary(
4745 &new_entry->object.vm_object,
4746 &new_entry->offset,
4747 &src_needs_copy,
4748 &new_entry_needs_copy)) {
4749
4750 /*
4751 * Handle copy-on-write obligations
4752 */
4753
4754 if (src_needs_copy && !old_entry->needs_copy) {
4755 vm_object_pmap_protect(
4756 old_entry->object.vm_object,
4757 old_entry->offset,
4758 entry_size,
4759 (old_entry->is_shared ?
4760 PMAP_NULL :
4761 old_map->pmap),
4762 old_entry->vme_start,
4763 old_entry->protection &
4764 ~VM_PROT_WRITE);
4765
4766 old_entry->needs_copy = TRUE;
4767 }
4768
4769 new_entry->needs_copy = new_entry_needs_copy;
4770
4771 /*
4772 * Insert the entry at the end
4773 * of the map.
4774 */
4775
4776 vm_map_entry_link(new_map,
4777 vm_map_last_entry(new_map),
4778 new_entry);
4779
4780
4781 new_size += entry_size;
4782 break;
4783 }
4784
4785 vm_map_entry_dispose(new_map, new_entry);
4786 }
4787
4788 /* INNER BLOCK (copy cannot be optimized) */ {
4789
4790 vm_offset_t start = old_entry->vme_start;
4791 vm_map_copy_t copy;
4792 vm_map_entry_t last = vm_map_last_entry(new_map);
4793
4794 vm_map_unlock(old_map);
4795 if (vm_map_copyin(old_map,
4796 start,
4797 entry_size,
4798 FALSE,
4799 ©)
4800 != KERN_SUCCESS) {
4801 vm_map_lock(old_map);
4802 if (!vm_map_lookup_entry(old_map, start, &last))
4803 last = last->vme_next;
4804 old_entry = last;
4805 /*
4806 * For some error returns, want to
4807 * skip to the next element.
4808 */
4809
4810 continue;
4811 }
4812
4813 /*
4814 * Insert the copy into the new map
4815 */
4816
4817 vm_map_copy_insert(new_map, last, copy);
4818 new_size += entry_size;
4819
4820 /*
4821 * Pick up the traversal at the end of
4822 * the copied region.
4823 */
4824
4825 vm_map_lock(old_map);
4826 start += entry_size;
4827 if (!vm_map_lookup_entry(old_map, start, &last))
4828 last = last->vme_next;
4829 else
4830 vm_map_clip_start(old_map, last, start);
4831 old_entry = last;
4832
4833 continue;
4834 /* INNER BLOCK (copy cannot be optimized) */ }
4835 }
4836 old_entry = old_entry->vme_next;
4837 }
4838
4839 new_map->size = new_size;
4840 vm_map_unlock(old_map);
4841
4842 return new_map;
4843 }
4844
4845 /*
4846 * vm_map_lookup:
4847 *
4848 * Finds the VM object, offset, and
4849 * protection for a given virtual address in the
4850 * specified map, assuming a page fault of the
4851 * type specified.
4852 *
4853 * Returns the (object, offset, protection) for
4854 * this address, whether it is wired down, and whether
4855 * this map has the only reference to the data in question.
4856 * In order to later verify this lookup, a "version"
4857 * is returned.
4858 *
4859 * The map should not be locked; it will not be
4860 * locked on exit. In order to guarantee the
4861 * existence of the returned object, it is returned
4862 * locked.
4863 *
4864 * If a lookup is requested with "write protection"
4865 * specified, the map may be changed to perform virtual
4866 * copying operations, although the data referenced will
4867 * remain the same.
4868 */
4869 kern_return_t vm_map_lookup(
4870 vm_map_t *var_map, /* IN/OUT */
4871 register vm_offset_t vaddr,
4872 register vm_prot_t fault_type,
4873
4874 vm_map_version_t *out_version, /* OUT */
4875 vm_object_t *object, /* OUT */
4876 vm_offset_t *offset, /* OUT */
4877 vm_prot_t *out_prot, /* OUT */
4878 boolean_t *wired) /* OUT */
4879 {
4880 register vm_map_entry_t entry;
4881 register vm_map_t map = *var_map;
4882 register vm_prot_t prot;
4883
4884 RetryLookup: ;
4885
4886 /*
4887 * Lookup the faulting address.
4888 */
4889
4890 vm_map_lock_read(map);
4891
4892 #define RETURN(why) \
4893 { \
4894 vm_map_unlock_read(map); \
4895 return why; \
4896 }
4897
4898 /*
4899 * If the map has an interesting hint, try it before calling
4900 * full blown lookup routine.
4901 */
4902
4903 simple_lock(&map->hint_lock);
4904 entry = map->hint;
4905 simple_unlock(&map->hint_lock);
4906
4907 if ((entry == vm_map_to_entry(map)) ||
4908 (vaddr < entry->vme_start) || (vaddr >= entry->vme_end)) {
4909 vm_map_entry_t tmp_entry;
4910
4911 /*
4912 * Entry was either not a valid hint, or the vaddr
4913 * was not contained in the entry, so do a full lookup.
4914 */
4915 if (!vm_map_lookup_entry(map, vaddr, &tmp_entry))
4916 RETURN(KERN_INVALID_ADDRESS);
4917
4918 entry = tmp_entry;
4919 }
4920
4921 /*
4922 * Handle submaps.
4923 */
4924
4925 if (entry->is_sub_map) {
4926 vm_map_t old_map = map;
4927
4928 *var_map = map = entry->object.sub_map;
4929 vm_map_unlock_read(old_map);
4930 goto RetryLookup;
4931 }
4932
4933 /*
4934 * Check whether this task is allowed to have
4935 * this page.
4936 */
4937
4938 prot = entry->protection;
4939 if ((fault_type & (prot)) != fault_type)
4940 RETURN(KERN_PROTECTION_FAILURE);
4941
4942 /*
4943 * If this page is not pageable, we have to get
4944 * it for all possible accesses.
4945 */
4946
4947 if (*wired = (entry->wired_count != 0))
4948 prot = fault_type = entry->protection;
4949
4950 /*
4951 * If the entry was copy-on-write, we either ...
4952 */
4953
4954 if (entry->needs_copy) {
4955 /*
4956 * If we want to write the page, we may as well
4957 * handle that now since we've got the map locked.
4958 *
4959 * If we don't need to write the page, we just
4960 * demote the permissions allowed.
4961 */
4962
4963 if (fault_type & VM_PROT_WRITE) {
4964 /*
4965 * Make a new object, and place it in the
4966 * object chain. Note that no new references
4967 * have appeared -- one just moved from the
4968 * map to the new object.
4969 */
4970
4971 if (vm_map_lock_read_to_write(map)) {
4972 goto RetryLookup;
4973 }
4974 map->timestamp++;
4975
4976 vm_object_shadow(
4977 &entry->object.vm_object,
4978 &entry->offset,
4979 (vm_size_t) (entry->vme_end - entry->vme_start));
4980
4981 entry->needs_copy = FALSE;
4982
4983 vm_map_lock_write_to_read(map);
4984 }
4985 else {
4986 /*
4987 * We're attempting to read a copy-on-write
4988 * page -- don't allow writes.
4989 */
4990
4991 prot &= (~VM_PROT_WRITE);
4992 }
4993 }
4994
4995 /*
4996 * Create an object if necessary.
4997 */
4998 if (entry->object.vm_object == VM_OBJECT_NULL) {
4999
5000 if (vm_map_lock_read_to_write(map)) {
5001 goto RetryLookup;
5002 }
5003
5004 entry->object.vm_object = vm_object_allocate(
5005 (vm_size_t)(entry->vme_end - entry->vme_start));
5006 entry->offset = 0;
5007 vm_map_lock_write_to_read(map);
5008 }
5009
5010 /*
5011 * Return the object/offset from this entry. If the entry
5012 * was copy-on-write or empty, it has been fixed up. Also
5013 * return the protection.
5014 */
5015
5016 *offset = (vaddr - entry->vme_start) + entry->offset;
5017 *object = entry->object.vm_object;
5018 *out_prot = prot;
5019
5020 /*
5021 * Lock the object to prevent it from disappearing
5022 */
5023
5024 vm_object_lock(*object);
5025
5026 /*
5027 * Save the version number and unlock the map.
5028 */
5029
5030 out_version->main_timestamp = map->timestamp;
5031
5032 RETURN(KERN_SUCCESS);
5033
5034 #undef RETURN
5035 }
5036
5037 /*
5038 * vm_map_verify:
5039 *
5040 * Verifies that the map in question has not changed
5041 * since the given version. If successful, the map
5042 * will not change until vm_map_verify_done() is called.
5043 */
5044 boolean_t vm_map_verify(
5045 register
5046 vm_map_t map,
5047 register
5048 vm_map_version_t *version) /* REF */
5049 {
5050 boolean_t result;
5051
5052 vm_map_lock_read(map);
5053 result = (map->timestamp == version->main_timestamp);
5054
5055 if (!result)
5056 vm_map_unlock_read(map);
5057
5058 return result;
5059 }
5060
5061 /*
5062 * vm_map_verify_done:
5063 *
5064 * Releases locks acquired by a vm_map_verify.
5065 *
5066 * This is now a macro in vm/vm_map.h. It does a
5067 * vm_map_unlock_read on the map.
5068 */
5069
5070 /*
5071 * vm_region:
5072 *
5073 * User call to obtain information about a region in
5074 * a task's address map.
5075 */
5076
5077 kern_return_t vm_region(
5078 vm_map_t map,
5079 vm_offset_t *address, /* IN/OUT */
5080 vm_size_t *size, /* OUT */
5081 vm_prot_t *protection, /* OUT */
5082 vm_prot_t *max_protection, /* OUT */
5083 vm_inherit_t *inheritance, /* OUT */
5084 boolean_t *is_shared, /* OUT */
5085 ipc_port_t *object_name, /* OUT */
5086 vm_offset_t *offset_in_object) /* OUT */
5087 {
5088 vm_map_entry_t tmp_entry;
5089 register
5090 vm_map_entry_t entry;
5091 register
5092 vm_offset_t tmp_offset;
5093 vm_offset_t start;
5094
5095 if (map == VM_MAP_NULL)
5096 return KERN_INVALID_ARGUMENT;
5097
5098 start = *address;
5099
5100 vm_map_lock_read(map);
5101 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
5102 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
5103 vm_map_unlock_read(map);
5104 return KERN_NO_SPACE;
5105 }
5106 } else {
5107 entry = tmp_entry;
5108 }
5109
5110 start = entry->vme_start;
5111 *protection = entry->protection;
5112 *max_protection = entry->max_protection;
5113 *inheritance = entry->inheritance;
5114 *address = start;
5115 *size = (entry->vme_end - start);
5116
5117 tmp_offset = entry->offset;
5118
5119
5120 if (entry->is_sub_map) {
5121 *is_shared = FALSE;
5122 *object_name = IP_NULL;
5123 *offset_in_object = tmp_offset;
5124 } else {
5125 *is_shared = entry->is_shared;
5126 *object_name = vm_object_name(entry->object.vm_object);
5127 *offset_in_object = tmp_offset;
5128 }
5129
5130 vm_map_unlock_read(map);
5131
5132 return KERN_SUCCESS;
5133 }
5134
5135 /*
5136 * Routine: vm_map_simplify
5137 *
5138 * Description:
5139 * Attempt to simplify the map representation in
5140 * the vicinity of the given starting address.
5141 * Note:
5142 * This routine is intended primarily to keep the
5143 * kernel maps more compact -- they generally don't
5144 * benefit from the "expand a map entry" technology
5145 * at allocation time because the adjacent entry
5146 * is often wired down.
5147 */
5148 void vm_map_simplify(
5149 vm_map_t map,
5150 vm_offset_t start)
5151 {
5152 vm_map_entry_t this_entry;
5153 vm_map_entry_t prev_entry;
5154
5155 vm_map_lock(map);
5156 if (
5157 (vm_map_lookup_entry(map, start, &this_entry)) &&
5158 ((prev_entry = this_entry->vme_prev) != vm_map_to_entry(map)) &&
5159
5160 (prev_entry->vme_end == start) &&
5161
5162 (prev_entry->is_shared == FALSE) &&
5163 (prev_entry->is_sub_map == FALSE) &&
5164
5165 (this_entry->is_shared == FALSE) &&
5166 (this_entry->is_sub_map == FALSE) &&
5167
5168 (prev_entry->inheritance == this_entry->inheritance) &&
5169 (prev_entry->protection == this_entry->protection) &&
5170 (prev_entry->max_protection == this_entry->max_protection) &&
5171 (prev_entry->wired_count == this_entry->wired_count) &&
5172 (prev_entry->user_wired_count == this_entry->user_wired_count) &&
5173
5174 (prev_entry->needs_copy == this_entry->needs_copy) &&
5175
5176 (prev_entry->object.vm_object == this_entry->object.vm_object) &&
5177 ((prev_entry->offset + (prev_entry->vme_end - prev_entry->vme_start))
5178 == this_entry->offset)
5179 #if NET_ATM
5180 && (prev_entry->projected_on == 0)
5181 && (this_entry->projected_on == 0)
5182 #endif /* NET_ATM */
5183 ) {
5184 if (map->first_free == this_entry)
5185 map->first_free = prev_entry;
5186
5187 SAVE_HINT(map, prev_entry);
5188 vm_map_entry_unlink(map, this_entry);
5189 prev_entry->vme_end = this_entry->vme_end;
5190 vm_object_deallocate(this_entry->object.vm_object);
5191 vm_map_entry_dispose(map, this_entry);
5192 }
5193 vm_map_unlock(map);
5194 }
5195
5196
5197 /*
5198 * Routine: vm_map_machine_attribute
5199 * Purpose:
5200 * Provide machine-specific attributes to mappings,
5201 * such as cachability etc. for machines that provide
5202 * them. NUMA architectures and machines with big/strange
5203 * caches will use this.
5204 * Note:
5205 * Responsibilities for locking and checking are handled here,
5206 * everything else in the pmap module. If any non-volatile
5207 * information must be kept, the pmap module should handle
5208 * it itself. [This assumes that attributes do not
5209 * need to be inherited, which seems ok to me]
5210 */
5211 kern_return_t vm_map_machine_attribute(
5212 vm_map_t map,
5213 vm_offset_t address,
5214 vm_size_t size,
5215 vm_machine_attribute_t attribute,
5216 vm_machine_attribute_val_t* value) /* IN/OUT */
5217 {
5218 kern_return_t ret;
5219
5220 if (address < vm_map_min(map) ||
5221 (address + size) > vm_map_max(map))
5222 return KERN_INVALID_ARGUMENT;
5223
5224 vm_map_lock(map);
5225
5226 ret = pmap_attribute(map->pmap, address, size, attribute, value);
5227
5228 vm_map_unlock(map);
5229
5230 return ret;
5231 }
5232
5233 #include <mach_kdb.h>
5234
5235
5236 #if MACH_KDB
5237 #include <ddb/db_output.h>
5238
5239 /*
5240 * vm_map_print: [ debug ]
5241 */
5242 void vm_map_print(
5243 register vm_map_t map)
5244 {
5245 register vm_map_entry_t entry;
5246
5247 db_iprintf("Task map 0x%X: pmap=0x%X,",
5248 (vm_offset_t) map, (vm_offset_t) (map->pmap));
5249 db_printf("ref=%d,nentries=%d,", map->ref_count, map->hdr.nentries);
5250 db_printf("version=%d\n", map->timestamp);
5251 db_indent += 2;
5252 for (entry = vm_map_first_entry(map);
5253 entry != vm_map_to_entry(map);
5254 entry = entry->vme_next) {
5255 static char *inheritance_name[3] = { "share", "copy", "none"};
5256
5257 db_iprintf("map entry 0x%X: ", (vm_offset_t) entry);
5258 db_printf("start=0x%X, end=0x%X, ",
5259 entry->vme_start, entry->vme_end);
5260 db_printf("prot=%X/%X/%s, ",
5261 entry->protection,
5262 entry->max_protection,
5263 inheritance_name[entry->inheritance]);
5264 if (entry->wired_count != 0) {
5265 db_printf("wired(");
5266 if (entry->user_wired_count != 0)
5267 db_printf("u");
5268 if (entry->wired_count >
5269 ((entry->user_wired_count == 0) ? 0 : 1))
5270 db_printf("k");
5271 db_printf(") ");
5272 }
5273 if (entry->in_transition) {
5274 db_printf("in transition");
5275 if (entry->needs_wakeup)
5276 db_printf("(wake request)");
5277 db_printf(", ");
5278 }
5279 if (entry->is_sub_map) {
5280 db_printf("submap=0x%X, offset=0x%X\n",
5281 (vm_offset_t) entry->object.sub_map,
5282 entry->offset);
5283 } else {
5284 db_printf("object=0x%X, offset=0x%X",
5285 (vm_offset_t) entry->object.vm_object,
5286 entry->offset);
5287 if (entry->is_shared)
5288 db_printf(", shared");
5289 if (entry->needs_copy)
5290 db_printf(", copy needed");
5291 db_printf("\n");
5292
5293 if ((entry->vme_prev == vm_map_to_entry(map)) ||
5294 (entry->vme_prev->object.vm_object != entry->object.vm_object)) {
5295 db_indent += 2;
5296 vm_object_print(entry->object.vm_object);
5297 db_indent -= 2;
5298 }
5299 }
5300 }
5301 db_indent -= 2;
5302 }
5303
5304 /*
5305 * Routine: vm_map_copy_print
5306 * Purpose:
5307 * Pretty-print a copy object for ddb.
5308 */
5309
5310 void vm_map_copy_print(
5311 vm_map_copy_t copy)
5312 {
5313 int i, npages;
5314
5315 db_printf("copy object 0x%x\n", copy);
5316
5317 db_indent += 2;
5318
5319 db_iprintf("type=%d", copy->type);
5320 switch (copy->type) {
5321 case VM_MAP_COPY_ENTRY_LIST:
5322 db_printf("[entry_list]");
5323 break;
5324
5325 case VM_MAP_COPY_OBJECT:
5326 db_printf("[object]");
5327 break;
5328
5329 case VM_MAP_COPY_PAGE_LIST:
5330 db_printf("[page_list]");
5331 break;
5332
5333 default:
5334 db_printf("[bad type]");
5335 break;
5336 }
5337 db_printf(", offset=0x%x", copy->offset);
5338 db_printf(", size=0x%x\n", copy->size);
5339
5340 switch (copy->type) {
5341 case VM_MAP_COPY_ENTRY_LIST:
5342 /* XXX add stuff here */
5343 break;
5344
5345 case VM_MAP_COPY_OBJECT:
5346 db_iprintf("object=0x%x\n", copy->cpy_object);
5347 break;
5348
5349 case VM_MAP_COPY_PAGE_LIST:
5350 db_iprintf("npages=%d", copy->cpy_npages);
5351 db_printf(", cont=%x", copy->cpy_cont);
5352 db_printf(", cont_args=%x\n", copy->cpy_cont_args);
5353 if (copy->cpy_npages < 0) {
5354 npages = 0;
5355 } else if (copy->cpy_npages > VM_MAP_COPY_PAGE_LIST_MAX) {
5356 npages = VM_MAP_COPY_PAGE_LIST_MAX;
5357 } else {
5358 npages = copy->cpy_npages;
5359 }
5360 db_iprintf("copy->cpy_page_list[0..%d] = {", npages);
5361 for (i = 0; i < npages - 1; i++) {
5362 db_printf("0x%x, ", copy->cpy_page_list[i]);
5363 }
5364 if (npages > 0) {
5365 db_printf("0x%x", copy->cpy_page_list[npages - 1]);
5366 }
5367 db_printf("}\n");
5368 break;
5369 }
5370
5371 db_indent -=2;
5372 }
5373 #endif /* MACH_KDB */
5374
5375 #if NORMA_IPC
5376 /*
5377 * This should one day be eliminated;
5378 * we should always construct the right flavor of copy object
5379 * the first time. Troublesome areas include vm_read, where vm_map_copyin
5380 * is called without knowing whom the copy object is for.
5381 * There are also situations where we do want a lazy data structure
5382 * even if we are sending to a remote port...
5383 */
5384
5385 /*
5386 * Convert a copy to a page list. The copy argument is in/out
5387 * because we probably have to allocate a new vm_map_copy structure.
5388 * We take responsibility for discarding the old structure and
5389 * use a continuation to do so. Postponing this discard ensures
5390 * that the objects containing the pages we've marked busy will stick
5391 * around.
5392 */
5393 kern_return_t
5394 vm_map_convert_to_page_list(
5395 vm_map_copy_t *caller_copy)
5396 {
5397 vm_map_entry_t entry, next_entry;
5398 vm_offset_t va;
5399 vm_offset_t offset;
5400 vm_object_t object;
5401 kern_return_t result;
5402 vm_map_copy_t copy, new_copy;
5403 int i, num_pages = 0;
5404
5405 zone_t entry_zone;
5406
5407 copy = *caller_copy;
5408
5409 /*
5410 * We may not have to do anything,
5411 * or may not be able to do anything.
5412 */
5413 if (copy == VM_MAP_COPY_NULL || copy->type == VM_MAP_COPY_PAGE_LIST) {
5414 return KERN_SUCCESS;
5415 }
5416 if (copy->type == VM_MAP_COPY_OBJECT) {
5417 return vm_map_convert_to_page_list_from_object(caller_copy);
5418 }
5419 if (copy->type != VM_MAP_COPY_ENTRY_LIST) {
5420 panic("vm_map_convert_to_page_list: copy type %d!\n",
5421 copy->type);
5422 }
5423
5424 /*
5425 * Allocate the new copy. Set its continuation to
5426 * discard the old one.
5427 */
5428 new_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
5429 new_copy->type = VM_MAP_COPY_PAGE_LIST;
5430 new_copy->cpy_npages = 0;
5431 new_copy->offset = copy->offset;
5432 new_copy->size = copy->size;
5433 new_copy->cpy_cont = vm_map_copy_discard_cont;
5434 new_copy->cpy_cont_args = (void *) copy;
5435
5436 /*
5437 * Iterate over entries.
5438 */
5439 for (entry = vm_map_copy_first_entry(copy);
5440 entry != vm_map_copy_to_entry(copy);
5441 entry = entry->vme_next) {
5442
5443 object = entry->object.vm_object;
5444 offset = entry->offset;
5445 /*
5446 * Iterate over pages.
5447 */
5448 for (va = entry->vme_start;
5449 va < entry->vme_end;
5450 va += PAGE_SIZE, offset += PAGE_SIZE) {
5451
5452 vm_page_t m;
5453
5454 if (new_copy->cpy_npages == VM_MAP_COPY_PAGE_LIST_MAX) {
5455 /*
5456 * What a mess. We need a continuation
5457 * to do the page list, but also one
5458 * to discard the old copy. The right
5459 * thing to do is probably to copy
5460 * out the old copy into the kernel
5461 * map (or some temporary task holding
5462 * map if we're paranoid about large
5463 * copies), and then copyin the page
5464 * list that we really wanted with
5465 * src_destroy. LATER.
5466 */
5467 panic("vm_map_convert_to_page_list: num\n");
5468 }
5469
5470 /*
5471 * Try to find the page of data.
5472 */
5473 vm_object_lock(object);
5474 vm_object_paging_begin(object);
5475 if (((m = vm_page_lookup(object, offset)) !=
5476 VM_PAGE_NULL) && !m->busy && !m->fictitious &&
5477 !m->absent && !m->error) {
5478
5479 /*
5480 * This is the page. Mark it busy
5481 * and keep the paging reference on
5482 * the object whilst we do our thing.
5483 */
5484 m->busy = TRUE;
5485
5486 /*
5487 * Also write-protect the page, so
5488 * that the map`s owner cannot change
5489 * the data. The busy bit will prevent
5490 * faults on the page from succeeding
5491 * until the copy is released; after
5492 * that, the page can be re-entered
5493 * as writable, since we didn`t alter
5494 * the map entry. This scheme is a
5495 * cheap copy-on-write.
5496 *
5497 * Don`t forget the protection and
5498 * the page_lock value!
5499 */
5500
5501 pmap_page_protect(m->phys_addr,
5502 entry->protection
5503 & ~m->page_lock
5504 & ~VM_PROT_WRITE);
5505
5506 }
5507 else {
5508 vm_prot_t result_prot;
5509 vm_page_t top_page;
5510 kern_return_t kr;
5511
5512 retry:
5513 result_prot = VM_PROT_READ;
5514
5515 kr = vm_fault_page(object, offset,
5516 VM_PROT_READ, FALSE, FALSE,
5517 &result_prot, &m, &top_page,
5518 FALSE, CONTINUE_NULL);
5519 if (kr == VM_FAULT_MEMORY_SHORTAGE) {
5520 VM_PAGE_WAIT(CONTINUE_NULL);
5521 vm_object_lock(object);
5522 vm_object_paging_begin(object);
5523 goto retry;
5524 }
5525 if (kr != VM_FAULT_SUCCESS) {
5526 /* XXX what about data_error? */
5527 vm_object_lock(object);
5528 vm_object_paging_begin(object);
5529 goto retry;
5530 }
5531 if (top_page != VM_PAGE_NULL) {
5532 vm_object_lock(object);
5533 VM_PAGE_FREE(top_page);
5534 vm_object_paging_end(object);
5535 vm_object_unlock(object);
5536 }
5537 }
5538 assert(m);
5539 m->busy = TRUE;
5540 new_copy->cpy_page_list[new_copy->cpy_npages++] = m;
5541 vm_object_unlock(object);
5542 }
5543 }
5544
5545 *caller_copy = new_copy;
5546 return KERN_SUCCESS;
5547 }
5548
5549 kern_return_t
5550 vm_map_convert_to_page_list_from_object(
5551 vm_map_copy_t *caller_copy)
5552 {
5553 vm_object_t object;
5554 vm_offset_t offset;
5555 vm_map_copy_t copy, new_copy;
5556
5557 copy = *caller_copy;
5558 assert(copy->type == VM_MAP_COPY_OBJECT);
5559 object = copy->cpy_object;
5560 assert(object->size == round_page(object->size));
5561
5562 /*
5563 * Allocate the new copy. Set its continuation to
5564 * discard the old one.
5565 */
5566 new_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
5567 new_copy->type = VM_MAP_COPY_PAGE_LIST;
5568 new_copy->cpy_npages = 0;
5569 new_copy->offset = copy->offset;
5570 new_copy->size = copy->size;
5571 new_copy->cpy_cont = vm_map_copy_discard_cont;
5572 new_copy->cpy_cont_args = (void *) copy;
5573
5574 /*
5575 * XXX memory_object_lock_request can probably bust this
5576 * XXX See continuation comment in previous routine for solution.
5577 */
5578 assert(object->size <= VM_MAP_COPY_PAGE_LIST_MAX * PAGE_SIZE);
5579
5580 for (offset = 0; offset < object->size; offset += PAGE_SIZE) {
5581 vm_page_t m;
5582
5583 /*
5584 * Try to find the page of data.
5585 */
5586 vm_object_lock(object);
5587 vm_object_paging_begin(object);
5588 m = vm_page_lookup(object, offset);
5589 if ((m != VM_PAGE_NULL) && !m->busy && !m->fictitious &&
5590 !m->absent && !m->error) {
5591
5592 /*
5593 * This is the page. Mark it busy
5594 * and keep the paging reference on
5595 * the object whilst we do our thing.
5596 */
5597 m->busy = TRUE;
5598 }
5599 else {
5600 vm_prot_t result_prot;
5601 vm_page_t top_page;
5602 kern_return_t kr;
5603
5604 retry:
5605 result_prot = VM_PROT_READ;
5606
5607 kr = vm_fault_page(object, offset,
5608 VM_PROT_READ, FALSE, FALSE,
5609 &result_prot, &m, &top_page,
5610 FALSE, CONTINUE_NULL);
5611 if (kr == VM_FAULT_MEMORY_SHORTAGE) {
5612 VM_PAGE_WAIT(CONTINUE_NULL);
5613 vm_object_lock(object);
5614 vm_object_paging_begin(object);
5615 goto retry;
5616 }
5617 if (kr != VM_FAULT_SUCCESS) {
5618 /* XXX what about data_error? */
5619 vm_object_lock(object);
5620 vm_object_paging_begin(object);
5621 goto retry;
5622 }
5623
5624 if (top_page != VM_PAGE_NULL) {
5625 vm_object_lock(object);
5626 VM_PAGE_FREE(top_page);
5627 vm_object_paging_end(object);
5628 vm_object_unlock(object);
5629 }
5630 }
5631 assert(m);
5632 m->busy = TRUE;
5633 new_copy->cpy_page_list[new_copy->cpy_npages++] = m;
5634 vm_object_unlock(object);
5635 }
5636
5637 *caller_copy = new_copy;
5638 return KERN_SUCCESS;
5639 }
5640
5641 kern_return_t
5642 vm_map_convert_from_page_list(
5643 vm_map_copy_t copy)
5644 {
5645 vm_object_t object;
5646 int i;
5647 vm_map_entry_t new_entry;
5648 vm_page_t *page_list;
5649
5650 /*
5651 * Check type of copy object.
5652 */
5653 if (copy->type == VM_MAP_COPY_ENTRY_LIST) {
5654 return KERN_SUCCESS;
5655 }
5656 if (copy->type == VM_MAP_COPY_OBJECT) {
5657 printf("vm_map_convert_from_page_list: COPY_OBJECT?");
5658 return KERN_SUCCESS;
5659 }
5660 if (copy->type != VM_MAP_COPY_PAGE_LIST) {
5661 panic("vm_map_convert_from_page_list 0x%x %d",
5662 copy,
5663 copy->type);
5664 }
5665
5666 /*
5667 * Make sure the pages are loose. This may be
5668 * a "Can't Happen", but just to be safe ...
5669 */
5670 page_list = ©->cpy_page_list[0];
5671 if ((*page_list)->tabled)
5672 vm_map_copy_steal_pages(copy);
5673
5674 /*
5675 * Create object, and stuff pages into it.
5676 */
5677 object = vm_object_allocate(copy->cpy_npages);
5678 for (i = 0; i < copy->cpy_npages; i++) {
5679 register vm_page_t m = *page_list++;
5680 vm_page_insert(m, object, i * PAGE_SIZE);
5681 m->busy = FALSE;
5682 m->dirty = TRUE;
5683 vm_page_activate(m);
5684 }
5685
5686 /*
5687 * XXX If this page list contained a continuation, then
5688 * XXX we're screwed. The right thing to do is probably do
5689 * XXX the copyout, and then copyin the entry list we really
5690 * XXX wanted.
5691 */
5692 if (vm_map_copy_has_cont(copy))
5693 panic("convert_from_page_list: continuation");
5694
5695 /*
5696 * Change type of copy object
5697 */
5698 vm_map_copy_first_entry(copy) =
5699 vm_map_copy_last_entry(copy) = vm_map_copy_to_entry(copy);
5700 copy->type = VM_MAP_COPY_ENTRY_LIST;
5701 copy->cpy_hdr.nentries = 0;
5702 copy->cpy_hdr.entries_pageable = TRUE;
5703
5704 /*
5705 * Allocate and initialize an entry for object
5706 */
5707 new_entry = vm_map_copy_entry_create(copy);
5708 new_entry->vme_start = trunc_page(copy->offset);
5709 new_entry->vme_end = round_page(copy->offset + copy->size);
5710 new_entry->object.vm_object = object;
5711 new_entry->offset = 0;
5712 new_entry->is_shared = FALSE;
5713 new_entry->is_sub_map = FALSE;
5714 new_entry->needs_copy = FALSE;
5715 new_entry->protection = VM_PROT_DEFAULT;
5716 new_entry->max_protection = VM_PROT_ALL;
5717 new_entry->inheritance = VM_INHERIT_DEFAULT;
5718 new_entry->wired_count = 0;
5719 new_entry->user_wired_count = 0;
5720 #if NET_ATM
5721 new_entry->projected_on = 0;
5722 #endif /* NET_ATM */
5723
5724 /*
5725 * Insert entry into copy object, and return.
5726 */
5727 vm_map_copy_entry_link(copy, vm_map_copy_last_entry(copy), new_entry);
5728 return KERN_SUCCESS;
5729 }
5730 #endif /* NORMA_IPC */
Cache object: 7f2a0bd8d0d87f0328fe13a3d9a50723
|