The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/vm/vm_map.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  * Copyright (c) 1991, 1993
    3  *      The Regents of the University of California.  All rights reserved.
    4  *
    5  * This code is derived from software contributed to Berkeley by
    6  * The Mach Operating System project at Carnegie-Mellon University.
    7  *
    8  * Redistribution and use in source and binary forms, with or without
    9  * modification, are permitted provided that the following conditions
   10  * are met:
   11  * 1. Redistributions of source code must retain the above copyright
   12  *    notice, this list of conditions and the following disclaimer.
   13  * 2. Redistributions in binary form must reproduce the above copyright
   14  *    notice, this list of conditions and the following disclaimer in the
   15  *    documentation and/or other materials provided with the distribution.
   16  * 3. All advertising materials mentioning features or use of this software
   17  *    must display the following acknowledgement:
   18  *      This product includes software developed by the University of
   19  *      California, Berkeley and its contributors.
   20  * 4. Neither the name of the University nor the names of its contributors
   21  *    may be used to endorse or promote products derived from this software
   22  *    without specific prior written permission.
   23  *
   24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   34  * SUCH DAMAGE.
   35  *
   36  *      from: @(#)vm_map.c      8.3 (Berkeley) 1/12/94
   37  *
   38  *
   39  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
   40  * All rights reserved.
   41  *
   42  * Authors: Avadis Tevanian, Jr., Michael Wayne Young
   43  *
   44  * Permission to use, copy, modify and distribute this software and
   45  * its documentation is hereby granted, provided that both the copyright
   46  * notice and this permission notice appear in all copies of the
   47  * software, derivative works or modified versions, and any portions
   48  * thereof, and that both notices appear in supporting documentation.
   49  *
   50  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
   51  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
   52  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
   53  *
   54  * Carnegie Mellon requests users of this software to return to
   55  *
   56  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
   57  *  School of Computer Science
   58  *  Carnegie Mellon University
   59  *  Pittsburgh PA 15213-3890
   60  *
   61  * any improvements or extensions that they make and grant Carnegie the
   62  * rights to redistribute these changes.
   63  *
   64  * $FreeBSD: releng/5.1/sys/vm/vm_map.c 114317 2003-04-30 19:18:09Z alc $
   65  */
   66 
   67 /*
   68  *      Virtual memory mapping module.
   69  */
   70 
   71 #include <sys/param.h>
   72 #include <sys/systm.h>
   73 #include <sys/ktr.h>
   74 #include <sys/lock.h>
   75 #include <sys/mutex.h>
   76 #include <sys/proc.h>
   77 #include <sys/vmmeter.h>
   78 #include <sys/mman.h>
   79 #include <sys/vnode.h>
   80 #include <sys/resourcevar.h>
   81 #include <sys/sysent.h>
   82 #include <sys/shm.h>
   83 
   84 #include <vm/vm.h>
   85 #include <vm/vm_param.h>
   86 #include <vm/pmap.h>
   87 #include <vm/vm_map.h>
   88 #include <vm/vm_page.h>
   89 #include <vm/vm_object.h>
   90 #include <vm/vm_pager.h>
   91 #include <vm/vm_kern.h>
   92 #include <vm/vm_extern.h>
   93 #include <vm/swap_pager.h>
   94 #include <vm/uma.h>
   95 
   96 /*
   97  *      Virtual memory maps provide for the mapping, protection,
   98  *      and sharing of virtual memory objects.  In addition,
   99  *      this module provides for an efficient virtual copy of
  100  *      memory from one map to another.
  101  *
  102  *      Synchronization is required prior to most operations.
  103  *
  104  *      Maps consist of an ordered doubly-linked list of simple
  105  *      entries; a single hint is used to speed up lookups.
  106  *
  107  *      Since portions of maps are specified by start/end addresses,
  108  *      which may not align with existing map entries, all
  109  *      routines merely "clip" entries to these start/end values.
  110  *      [That is, an entry is split into two, bordering at a
  111  *      start or end value.]  Note that these clippings may not
  112  *      always be necessary (as the two resulting entries are then
  113  *      not changed); however, the clipping is done for convenience.
  114  *
  115  *      As mentioned above, virtual copy operations are performed
  116  *      by copying VM object references from one map to
  117  *      another, and then marking both regions as copy-on-write.
  118  */
  119 
  120 /*
  121  *      vm_map_startup:
  122  *
  123  *      Initialize the vm_map module.  Must be called before
  124  *      any other vm_map routines.
  125  *
  126  *      Map and entry structures are allocated from the general
  127  *      purpose memory pool with some exceptions:
  128  *
  129  *      - The kernel map and kmem submap are allocated statically.
  130  *      - Kernel map entries are allocated out of a static pool.
  131  *
  132  *      These restrictions are necessary since malloc() uses the
  133  *      maps and requires map entries.
  134  */
  135 
  136 static struct mtx map_sleep_mtx;
  137 static uma_zone_t mapentzone;
  138 static uma_zone_t kmapentzone;
  139 static uma_zone_t mapzone;
  140 static uma_zone_t vmspace_zone;
  141 static struct vm_object kmapentobj;
  142 static void vmspace_zinit(void *mem, int size);
  143 static void vmspace_zfini(void *mem, int size);
  144 static void vm_map_zinit(void *mem, int size);
  145 static void vm_map_zfini(void *mem, int size);
  146 static void _vm_map_init(vm_map_t map, vm_offset_t min, vm_offset_t max);
  147 
  148 #ifdef INVARIANTS
  149 static void vm_map_zdtor(void *mem, int size, void *arg);
  150 static void vmspace_zdtor(void *mem, int size, void *arg);
  151 #endif
  152 
  153 void
  154 vm_map_startup(void)
  155 {
  156         mtx_init(&map_sleep_mtx, "vm map sleep mutex", NULL, MTX_DEF);
  157         mapzone = uma_zcreate("MAP", sizeof(struct vm_map), NULL,
  158 #ifdef INVARIANTS
  159             vm_map_zdtor,
  160 #else
  161             NULL,
  162 #endif
  163             vm_map_zinit, vm_map_zfini, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
  164         uma_prealloc(mapzone, MAX_KMAP);
  165         kmapentzone = uma_zcreate("KMAP ENTRY", sizeof(struct vm_map_entry), 
  166             NULL, NULL, NULL, NULL, UMA_ALIGN_PTR,
  167             UMA_ZONE_MTXCLASS | UMA_ZONE_VM);
  168         uma_prealloc(kmapentzone, MAX_KMAPENT);
  169         mapentzone = uma_zcreate("MAP ENTRY", sizeof(struct vm_map_entry), 
  170             NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
  171         uma_prealloc(mapentzone, MAX_MAPENT);
  172 }
  173 
  174 static void
  175 vmspace_zfini(void *mem, int size)
  176 {
  177         struct vmspace *vm;
  178 
  179         vm = (struct vmspace *)mem;
  180 
  181         vm_map_zfini(&vm->vm_map, sizeof(vm->vm_map));
  182 }
  183 
  184 static void
  185 vmspace_zinit(void *mem, int size)
  186 {
  187         struct vmspace *vm;
  188 
  189         vm = (struct vmspace *)mem;
  190 
  191         vm_map_zinit(&vm->vm_map, sizeof(vm->vm_map));
  192 }
  193 
  194 static void
  195 vm_map_zfini(void *mem, int size)
  196 {
  197         vm_map_t map;
  198 
  199         map = (vm_map_t)mem;
  200         mtx_destroy(&map->system_mtx);
  201         lockdestroy(&map->lock);
  202 }
  203 
  204 static void
  205 vm_map_zinit(void *mem, int size)
  206 {
  207         vm_map_t map;
  208 
  209         map = (vm_map_t)mem;
  210         map->nentries = 0;
  211         map->size = 0;
  212         map->infork = 0;
  213         mtx_init(&map->system_mtx, "system map", NULL, MTX_DEF | MTX_DUPOK);
  214         lockinit(&map->lock, PVM, "thrd_sleep", 0, LK_NOPAUSE);
  215 }
  216 
  217 #ifdef INVARIANTS
  218 static void
  219 vmspace_zdtor(void *mem, int size, void *arg)
  220 {
  221         struct vmspace *vm;
  222 
  223         vm = (struct vmspace *)mem;
  224 
  225         vm_map_zdtor(&vm->vm_map, sizeof(vm->vm_map), arg);
  226 }
  227 static void
  228 vm_map_zdtor(void *mem, int size, void *arg)
  229 {
  230         vm_map_t map;
  231 
  232         map = (vm_map_t)mem;
  233         KASSERT(map->nentries == 0,
  234             ("map %p nentries == %d on free.", 
  235             map, map->nentries));
  236         KASSERT(map->size == 0,
  237             ("map %p size == %lu on free.",
  238             map, (unsigned long)map->size));
  239         KASSERT(map->infork == 0,
  240             ("map %p infork == %d on free.",
  241             map, map->infork));
  242 }
  243 #endif  /* INVARIANTS */
  244 
  245 /*
  246  * Allocate a vmspace structure, including a vm_map and pmap,
  247  * and initialize those structures.  The refcnt is set to 1.
  248  * The remaining fields must be initialized by the caller.
  249  */
  250 struct vmspace *
  251 vmspace_alloc(min, max)
  252         vm_offset_t min, max;
  253 {
  254         struct vmspace *vm;
  255 
  256         GIANT_REQUIRED;
  257         vm = uma_zalloc(vmspace_zone, M_WAITOK);
  258         CTR1(KTR_VM, "vmspace_alloc: %p", vm);
  259         _vm_map_init(&vm->vm_map, min, max);
  260         pmap_pinit(vmspace_pmap(vm));
  261         vm->vm_map.pmap = vmspace_pmap(vm);             /* XXX */
  262         vm->vm_refcnt = 1;
  263         vm->vm_shm = NULL;
  264         vm->vm_exitingcnt = 0;
  265         return (vm);
  266 }
  267 
  268 void
  269 vm_init2(void) 
  270 {
  271         uma_zone_set_obj(kmapentzone, &kmapentobj, lmin(cnt.v_page_count,
  272             (VM_MAX_KERNEL_ADDRESS - KERNBASE) / PAGE_SIZE) / 8);
  273         vmspace_zone = uma_zcreate("VMSPACE", sizeof(struct vmspace), NULL,
  274 #ifdef INVARIANTS
  275             vmspace_zdtor,
  276 #else
  277             NULL,
  278 #endif
  279             vmspace_zinit, vmspace_zfini, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
  280         pmap_init2();
  281 }
  282 
  283 static __inline void
  284 vmspace_dofree(struct vmspace *vm)
  285 {
  286         CTR1(KTR_VM, "vmspace_free: %p", vm);
  287 
  288         /*
  289          * Make sure any SysV shm is freed, it might not have been in
  290          * exit1().
  291          */
  292         shmexit(vm);
  293 
  294         /*
  295          * Lock the map, to wait out all other references to it.
  296          * Delete all of the mappings and pages they hold, then call
  297          * the pmap module to reclaim anything left.
  298          */
  299         vm_map_lock(&vm->vm_map);
  300         (void) vm_map_delete(&vm->vm_map, vm->vm_map.min_offset,
  301             vm->vm_map.max_offset);
  302         vm_map_unlock(&vm->vm_map);
  303 
  304         pmap_release(vmspace_pmap(vm));
  305         uma_zfree(vmspace_zone, vm);
  306 }
  307 
  308 void
  309 vmspace_free(struct vmspace *vm)
  310 {
  311         GIANT_REQUIRED;
  312 
  313         if (vm->vm_refcnt == 0)
  314                 panic("vmspace_free: attempt to free already freed vmspace");
  315 
  316         if (--vm->vm_refcnt == 0 && vm->vm_exitingcnt == 0)
  317                 vmspace_dofree(vm);
  318 }
  319 
  320 void
  321 vmspace_exitfree(struct proc *p)
  322 {
  323         struct vmspace *vm;
  324 
  325         GIANT_REQUIRED;
  326         vm = p->p_vmspace;
  327         p->p_vmspace = NULL;
  328 
  329         /*
  330          * cleanup by parent process wait()ing on exiting child.  vm_refcnt
  331          * may not be 0 (e.g. fork() and child exits without exec()ing).
  332          * exitingcnt may increment above 0 and drop back down to zero
  333          * several times while vm_refcnt is held non-zero.  vm_refcnt
  334          * may also increment above 0 and drop back down to zero several 
  335          * times while vm_exitingcnt is held non-zero.
  336          * 
  337          * The last wait on the exiting child's vmspace will clean up 
  338          * the remainder of the vmspace.
  339          */
  340         if (--vm->vm_exitingcnt == 0 && vm->vm_refcnt == 0)
  341                 vmspace_dofree(vm);
  342 }
  343 
  344 /*
  345  * vmspace_swap_count() - count the approximate swap useage in pages for a
  346  *                        vmspace.
  347  *
  348  *      The map must be locked.
  349  *
  350  *      Swap useage is determined by taking the proportional swap used by
  351  *      VM objects backing the VM map.  To make up for fractional losses,
  352  *      if the VM object has any swap use at all the associated map entries
  353  *      count for at least 1 swap page.
  354  */
  355 int
  356 vmspace_swap_count(struct vmspace *vmspace)
  357 {
  358         vm_map_t map = &vmspace->vm_map;
  359         vm_map_entry_t cur;
  360         int count = 0;
  361 
  362         for (cur = map->header.next; cur != &map->header; cur = cur->next) {
  363                 vm_object_t object;
  364 
  365                 if ((cur->eflags & MAP_ENTRY_IS_SUB_MAP) == 0 &&
  366                     (object = cur->object.vm_object) != NULL) {
  367                         VM_OBJECT_LOCK(object);
  368                         if (object->type == OBJT_SWAP &&
  369                             object->un_pager.swp.swp_bcount != 0) {
  370                                 int n = (cur->end - cur->start) / PAGE_SIZE;
  371 
  372                                 count += object->un_pager.swp.swp_bcount *
  373                                     SWAP_META_PAGES * n / object->size + 1;
  374                         }
  375                         VM_OBJECT_UNLOCK(object);
  376                 }
  377         }
  378         return (count);
  379 }
  380 
  381 void
  382 _vm_map_lock(vm_map_t map, const char *file, int line)
  383 {
  384         int error;
  385 
  386         if (map->system_map)
  387                 _mtx_lock_flags(&map->system_mtx, 0, file, line);
  388         else {
  389                 error = lockmgr(&map->lock, LK_EXCLUSIVE, NULL, curthread);
  390                 KASSERT(error == 0, ("%s: failed to get lock", __func__));
  391         }
  392         map->timestamp++;
  393 }
  394 
  395 void
  396 _vm_map_unlock(vm_map_t map, const char *file, int line)
  397 {
  398 
  399         if (map->system_map)
  400                 _mtx_unlock_flags(&map->system_mtx, 0, file, line);
  401         else
  402                 lockmgr(&map->lock, LK_RELEASE, NULL, curthread);
  403 }
  404 
  405 void
  406 _vm_map_lock_read(vm_map_t map, const char *file, int line)
  407 {
  408         int error;
  409 
  410         if (map->system_map)
  411                 _mtx_lock_flags(&map->system_mtx, 0, file, line);
  412         else {
  413                 error = lockmgr(&map->lock, LK_EXCLUSIVE, NULL, curthread);
  414                 KASSERT(error == 0, ("%s: failed to get lock", __func__));
  415         }
  416 }
  417 
  418 void
  419 _vm_map_unlock_read(vm_map_t map, const char *file, int line)
  420 {
  421 
  422         if (map->system_map)
  423                 _mtx_unlock_flags(&map->system_mtx, 0, file, line);
  424         else
  425                 lockmgr(&map->lock, LK_RELEASE, NULL, curthread);
  426 }
  427 
  428 int
  429 _vm_map_trylock(vm_map_t map, const char *file, int line)
  430 {
  431         int error;
  432 
  433         error = map->system_map ?
  434             !_mtx_trylock(&map->system_mtx, 0, file, line) :
  435             lockmgr(&map->lock, LK_EXCLUSIVE | LK_NOWAIT, NULL, curthread);
  436         if (error == 0)
  437                 map->timestamp++;
  438         return (error == 0);
  439 }
  440 
  441 int
  442 _vm_map_trylock_read(vm_map_t map, const char *file, int line)
  443 {
  444         int error;
  445 
  446         error = map->system_map ?
  447             !_mtx_trylock(&map->system_mtx, 0, file, line) :
  448             lockmgr(&map->lock, LK_EXCLUSIVE | LK_NOWAIT, NULL, curthread);
  449         return (error == 0);
  450 }
  451 
  452 int
  453 _vm_map_lock_upgrade(vm_map_t map, const char *file, int line)
  454 {
  455 
  456         if (map->system_map) {
  457 #ifdef INVARIANTS
  458                 _mtx_assert(&map->system_mtx, MA_OWNED, file, line);
  459 #endif
  460         } else
  461                 KASSERT(lockstatus(&map->lock, curthread) == LK_EXCLUSIVE,
  462                     ("%s: lock not held", __func__));
  463         map->timestamp++;
  464         return (0);
  465 }
  466 
  467 void
  468 _vm_map_lock_downgrade(vm_map_t map, const char *file, int line)
  469 {
  470 
  471         if (map->system_map) {
  472 #ifdef INVARIANTS
  473                 _mtx_assert(&map->system_mtx, MA_OWNED, file, line);
  474 #endif
  475         } else
  476                 KASSERT(lockstatus(&map->lock, curthread) == LK_EXCLUSIVE,
  477                     ("%s: lock not held", __func__));
  478 }
  479 
  480 /*
  481  *      vm_map_unlock_and_wait:
  482  */
  483 int
  484 vm_map_unlock_and_wait(vm_map_t map, boolean_t user_wait)
  485 {
  486 
  487         mtx_lock(&map_sleep_mtx);
  488         vm_map_unlock(map);
  489         return (msleep(&map->root, &map_sleep_mtx, PDROP | PVM, "vmmaps", 0));
  490 }
  491 
  492 /*
  493  *      vm_map_wakeup:
  494  */
  495 void
  496 vm_map_wakeup(vm_map_t map)
  497 {
  498 
  499         /*
  500          * Acquire and release map_sleep_mtx to prevent a wakeup()
  501          * from being performed (and lost) between the vm_map_unlock()
  502          * and the msleep() in vm_map_unlock_and_wait().
  503          */
  504         mtx_lock(&map_sleep_mtx);
  505         mtx_unlock(&map_sleep_mtx);
  506         wakeup(&map->root);
  507 }
  508 
  509 long
  510 vmspace_resident_count(struct vmspace *vmspace)
  511 {
  512         return pmap_resident_count(vmspace_pmap(vmspace));
  513 }
  514 
  515 /*
  516  *      vm_map_create:
  517  *
  518  *      Creates and returns a new empty VM map with
  519  *      the given physical map structure, and having
  520  *      the given lower and upper address bounds.
  521  */
  522 vm_map_t
  523 vm_map_create(pmap_t pmap, vm_offset_t min, vm_offset_t max)
  524 {
  525         vm_map_t result;
  526 
  527         result = uma_zalloc(mapzone, M_WAITOK);
  528         CTR1(KTR_VM, "vm_map_create: %p", result);
  529         _vm_map_init(result, min, max);
  530         result->pmap = pmap;
  531         return (result);
  532 }
  533 
  534 /*
  535  * Initialize an existing vm_map structure
  536  * such as that in the vmspace structure.
  537  * The pmap is set elsewhere.
  538  */
  539 static void
  540 _vm_map_init(vm_map_t map, vm_offset_t min, vm_offset_t max)
  541 {
  542 
  543         map->header.next = map->header.prev = &map->header;
  544         map->needs_wakeup = FALSE;
  545         map->system_map = 0;
  546         map->min_offset = min;
  547         map->max_offset = max;
  548         map->first_free = &map->header;
  549         map->root = NULL;
  550         map->timestamp = 0;
  551 }
  552 
  553 void
  554 vm_map_init(vm_map_t map, vm_offset_t min, vm_offset_t max)
  555 {
  556         _vm_map_init(map, min, max);
  557         mtx_init(&map->system_mtx, "system map", NULL, MTX_DEF | MTX_DUPOK);
  558         lockinit(&map->lock, PVM, "thrd_sleep", 0, LK_NOPAUSE);
  559 }
  560 
  561 /*
  562  *      vm_map_entry_dispose:   [ internal use only ]
  563  *
  564  *      Inverse of vm_map_entry_create.
  565  */
  566 static void
  567 vm_map_entry_dispose(vm_map_t map, vm_map_entry_t entry)
  568 {
  569         uma_zfree(map->system_map ? kmapentzone : mapentzone, entry);
  570 }
  571 
  572 /*
  573  *      vm_map_entry_create:    [ internal use only ]
  574  *
  575  *      Allocates a VM map entry for insertion.
  576  *      No entry fields are filled in.
  577  */
  578 static vm_map_entry_t
  579 vm_map_entry_create(vm_map_t map)
  580 {
  581         vm_map_entry_t new_entry;
  582 
  583         if (map->system_map)
  584                 new_entry = uma_zalloc(kmapentzone, M_NOWAIT);
  585         else
  586                 new_entry = uma_zalloc(mapentzone, M_WAITOK);
  587         if (new_entry == NULL)
  588                 panic("vm_map_entry_create: kernel resources exhausted");
  589         return (new_entry);
  590 }
  591 
  592 /*
  593  *      vm_map_entry_set_behavior:
  594  *
  595  *      Set the expected access behavior, either normal, random, or
  596  *      sequential.
  597  */
  598 static __inline void
  599 vm_map_entry_set_behavior(vm_map_entry_t entry, u_char behavior)
  600 {
  601         entry->eflags = (entry->eflags & ~MAP_ENTRY_BEHAV_MASK) |
  602             (behavior & MAP_ENTRY_BEHAV_MASK);
  603 }
  604 
  605 /*
  606  *      vm_map_entry_splay:
  607  *
  608  *      Implements Sleator and Tarjan's top-down splay algorithm.  Returns
  609  *      the vm_map_entry containing the given address.  If, however, that
  610  *      address is not found in the vm_map, returns a vm_map_entry that is
  611  *      adjacent to the address, coming before or after it.
  612  */
  613 static vm_map_entry_t
  614 vm_map_entry_splay(vm_offset_t address, vm_map_entry_t root)
  615 {
  616         struct vm_map_entry dummy;
  617         vm_map_entry_t lefttreemax, righttreemin, y;
  618 
  619         if (root == NULL)
  620                 return (root);
  621         lefttreemax = righttreemin = &dummy;
  622         for (;; root = y) {
  623                 if (address < root->start) {
  624                         if ((y = root->left) == NULL)
  625                                 break;
  626                         if (address < y->start) {
  627                                 /* Rotate right. */
  628                                 root->left = y->right;
  629                                 y->right = root;
  630                                 root = y;
  631                                 if ((y = root->left) == NULL)
  632                                         break;
  633                         }
  634                         /* Link into the new root's right tree. */
  635                         righttreemin->left = root;
  636                         righttreemin = root;
  637                 } else if (address >= root->end) {
  638                         if ((y = root->right) == NULL)
  639                                 break;
  640                         if (address >= y->end) {
  641                                 /* Rotate left. */
  642                                 root->right = y->left;
  643                                 y->left = root;
  644                                 root = y;
  645                                 if ((y = root->right) == NULL)
  646                                         break;
  647                         }
  648                         /* Link into the new root's left tree. */
  649                         lefttreemax->right = root;
  650                         lefttreemax = root;
  651                 } else
  652                         break;
  653         }
  654         /* Assemble the new root. */
  655         lefttreemax->right = root->left;
  656         righttreemin->left = root->right;
  657         root->left = dummy.right;
  658         root->right = dummy.left;
  659         return (root);
  660 }
  661 
  662 /*
  663  *      vm_map_entry_{un,}link:
  664  *
  665  *      Insert/remove entries from maps.
  666  */
  667 static void
  668 vm_map_entry_link(vm_map_t map,
  669                   vm_map_entry_t after_where,
  670                   vm_map_entry_t entry)
  671 {
  672 
  673         CTR4(KTR_VM,
  674             "vm_map_entry_link: map %p, nentries %d, entry %p, after %p", map,
  675             map->nentries, entry, after_where);
  676         map->nentries++;
  677         entry->prev = after_where;
  678         entry->next = after_where->next;
  679         entry->next->prev = entry;
  680         after_where->next = entry;
  681 
  682         if (after_where != &map->header) {
  683                 if (after_where != map->root)
  684                         vm_map_entry_splay(after_where->start, map->root);
  685                 entry->right = after_where->right;
  686                 entry->left = after_where;
  687                 after_where->right = NULL;
  688         } else {
  689                 entry->right = map->root;
  690                 entry->left = NULL;
  691         }
  692         map->root = entry;
  693 }
  694 
  695 static void
  696 vm_map_entry_unlink(vm_map_t map,
  697                     vm_map_entry_t entry)
  698 {
  699         vm_map_entry_t next, prev, root;
  700 
  701         if (entry != map->root)
  702                 vm_map_entry_splay(entry->start, map->root);
  703         if (entry->left == NULL)
  704                 root = entry->right;
  705         else {
  706                 root = vm_map_entry_splay(entry->start, entry->left);
  707                 root->right = entry->right;
  708         }
  709         map->root = root;
  710 
  711         prev = entry->prev;
  712         next = entry->next;
  713         next->prev = prev;
  714         prev->next = next;
  715         map->nentries--;
  716         CTR3(KTR_VM, "vm_map_entry_unlink: map %p, nentries %d, entry %p", map,
  717             map->nentries, entry);
  718 }
  719 
  720 /*
  721  *      vm_map_lookup_entry:    [ internal use only ]
  722  *
  723  *      Finds the map entry containing (or
  724  *      immediately preceding) the specified address
  725  *      in the given map; the entry is returned
  726  *      in the "entry" parameter.  The boolean
  727  *      result indicates whether the address is
  728  *      actually contained in the map.
  729  */
  730 boolean_t
  731 vm_map_lookup_entry(
  732         vm_map_t map,
  733         vm_offset_t address,
  734         vm_map_entry_t *entry)  /* OUT */
  735 {
  736         vm_map_entry_t cur;
  737 
  738         cur = vm_map_entry_splay(address, map->root);
  739         if (cur == NULL)
  740                 *entry = &map->header;
  741         else {
  742                 map->root = cur;
  743 
  744                 if (address >= cur->start) {
  745                         *entry = cur;
  746                         if (cur->end > address)
  747                                 return (TRUE);
  748                 } else
  749                         *entry = cur->prev;
  750         }
  751         return (FALSE);
  752 }
  753 
  754 /*
  755  *      vm_map_insert:
  756  *
  757  *      Inserts the given whole VM object into the target
  758  *      map at the specified address range.  The object's
  759  *      size should match that of the address range.
  760  *
  761  *      Requires that the map be locked, and leaves it so.
  762  *
  763  *      If object is non-NULL, ref count must be bumped by caller
  764  *      prior to making call to account for the new entry.
  765  */
  766 int
  767 vm_map_insert(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
  768               vm_offset_t start, vm_offset_t end, vm_prot_t prot, vm_prot_t max,
  769               int cow)
  770 {
  771         vm_map_entry_t new_entry;
  772         vm_map_entry_t prev_entry;
  773         vm_map_entry_t temp_entry;
  774         vm_eflags_t protoeflags;
  775 
  776         /*
  777          * Check that the start and end points are not bogus.
  778          */
  779         if ((start < map->min_offset) || (end > map->max_offset) ||
  780             (start >= end))
  781                 return (KERN_INVALID_ADDRESS);
  782 
  783         /*
  784          * Find the entry prior to the proposed starting address; if it's part
  785          * of an existing entry, this range is bogus.
  786          */
  787         if (vm_map_lookup_entry(map, start, &temp_entry))
  788                 return (KERN_NO_SPACE);
  789 
  790         prev_entry = temp_entry;
  791 
  792         /*
  793          * Assert that the next entry doesn't overlap the end point.
  794          */
  795         if ((prev_entry->next != &map->header) &&
  796             (prev_entry->next->start < end))
  797                 return (KERN_NO_SPACE);
  798 
  799         protoeflags = 0;
  800 
  801         if (cow & MAP_COPY_ON_WRITE)
  802                 protoeflags |= MAP_ENTRY_COW|MAP_ENTRY_NEEDS_COPY;
  803 
  804         if (cow & MAP_NOFAULT) {
  805                 protoeflags |= MAP_ENTRY_NOFAULT;
  806 
  807                 KASSERT(object == NULL,
  808                         ("vm_map_insert: paradoxical MAP_NOFAULT request"));
  809         }
  810         if (cow & MAP_DISABLE_SYNCER)
  811                 protoeflags |= MAP_ENTRY_NOSYNC;
  812         if (cow & MAP_DISABLE_COREDUMP)
  813                 protoeflags |= MAP_ENTRY_NOCOREDUMP;
  814 
  815         if (object != NULL) {
  816                 /*
  817                  * OBJ_ONEMAPPING must be cleared unless this mapping
  818                  * is trivially proven to be the only mapping for any
  819                  * of the object's pages.  (Object granularity
  820                  * reference counting is insufficient to recognize
  821                  * aliases with precision.) 
  822                  */
  823                 if (object != kmem_object)
  824                         mtx_lock(&Giant);
  825                 VM_OBJECT_LOCK(object);
  826                 if (object->ref_count > 1 || object->shadow_count != 0)
  827                         vm_object_clear_flag(object, OBJ_ONEMAPPING);
  828                 VM_OBJECT_UNLOCK(object);
  829                 if (object != kmem_object)
  830                         mtx_unlock(&Giant);
  831         }
  832         else if ((prev_entry != &map->header) &&
  833                  (prev_entry->eflags == protoeflags) &&
  834                  (prev_entry->end == start) &&
  835                  (prev_entry->wired_count == 0) &&
  836                  ((prev_entry->object.vm_object == NULL) ||
  837                   vm_object_coalesce(prev_entry->object.vm_object,
  838                                      OFF_TO_IDX(prev_entry->offset),
  839                                      (vm_size_t)(prev_entry->end - prev_entry->start),
  840                                      (vm_size_t)(end - prev_entry->end)))) {
  841                 /*
  842                  * We were able to extend the object.  Determine if we
  843                  * can extend the previous map entry to include the 
  844                  * new range as well.
  845                  */
  846                 if ((prev_entry->inheritance == VM_INHERIT_DEFAULT) &&
  847                     (prev_entry->protection == prot) &&
  848                     (prev_entry->max_protection == max)) {
  849                         map->size += (end - prev_entry->end);
  850                         prev_entry->end = end;
  851                         vm_map_simplify_entry(map, prev_entry);
  852                         return (KERN_SUCCESS);
  853                 }
  854 
  855                 /*
  856                  * If we can extend the object but cannot extend the
  857                  * map entry, we have to create a new map entry.  We
  858                  * must bump the ref count on the extended object to
  859                  * account for it.  object may be NULL.
  860                  */
  861                 object = prev_entry->object.vm_object;
  862                 offset = prev_entry->offset +
  863                         (prev_entry->end - prev_entry->start);
  864                 vm_object_reference(object);
  865         }
  866 
  867         /*
  868          * NOTE: if conditionals fail, object can be NULL here.  This occurs
  869          * in things like the buffer map where we manage kva but do not manage
  870          * backing objects.
  871          */
  872 
  873         /*
  874          * Create a new entry
  875          */
  876         new_entry = vm_map_entry_create(map);
  877         new_entry->start = start;
  878         new_entry->end = end;
  879 
  880         new_entry->eflags = protoeflags;
  881         new_entry->object.vm_object = object;
  882         new_entry->offset = offset;
  883         new_entry->avail_ssize = 0;
  884 
  885         new_entry->inheritance = VM_INHERIT_DEFAULT;
  886         new_entry->protection = prot;
  887         new_entry->max_protection = max;
  888         new_entry->wired_count = 0;
  889 
  890         /*
  891          * Insert the new entry into the list
  892          */
  893         vm_map_entry_link(map, prev_entry, new_entry);
  894         map->size += new_entry->end - new_entry->start;
  895 
  896         /*
  897          * Update the free space hint
  898          */
  899         if ((map->first_free == prev_entry) &&
  900             (prev_entry->end >= new_entry->start)) {
  901                 map->first_free = new_entry;
  902         }
  903 
  904 #if 0
  905         /*
  906          * Temporarily removed to avoid MAP_STACK panic, due to
  907          * MAP_STACK being a huge hack.  Will be added back in
  908          * when MAP_STACK (and the user stack mapping) is fixed.
  909          */
  910         /*
  911          * It may be possible to simplify the entry
  912          */
  913         vm_map_simplify_entry(map, new_entry);
  914 #endif
  915 
  916         if (cow & (MAP_PREFAULT|MAP_PREFAULT_PARTIAL)) {
  917                 mtx_lock(&Giant);
  918                 pmap_object_init_pt(map->pmap, start,
  919                                     object, OFF_TO_IDX(offset), end - start,
  920                                     cow & MAP_PREFAULT_PARTIAL);
  921                 mtx_unlock(&Giant);
  922         }
  923 
  924         return (KERN_SUCCESS);
  925 }
  926 
  927 /*
  928  * Find sufficient space for `length' bytes in the given map, starting at
  929  * `start'.  The map must be locked.  Returns 0 on success, 1 on no space.
  930  */
  931 int
  932 vm_map_findspace(
  933         vm_map_t map,
  934         vm_offset_t start,
  935         vm_size_t length,
  936         vm_offset_t *addr)
  937 {
  938         vm_map_entry_t entry, next;
  939         vm_offset_t end;
  940 
  941         if (start < map->min_offset)
  942                 start = map->min_offset;
  943         if (start > map->max_offset)
  944                 return (1);
  945 
  946         /*
  947          * Look for the first possible address; if there's already something
  948          * at this address, we have to start after it.
  949          */
  950         if (start == map->min_offset) {
  951                 if ((entry = map->first_free) != &map->header)
  952                         start = entry->end;
  953         } else {
  954                 vm_map_entry_t tmp;
  955 
  956                 if (vm_map_lookup_entry(map, start, &tmp))
  957                         start = tmp->end;
  958                 entry = tmp;
  959         }
  960 
  961         /*
  962          * Look through the rest of the map, trying to fit a new region in the
  963          * gap between existing regions, or after the very last region.
  964          */
  965         for (;; start = (entry = next)->end) {
  966                 /*
  967                  * Find the end of the proposed new region.  Be sure we didn't
  968                  * go beyond the end of the map, or wrap around the address;
  969                  * if so, we lose.  Otherwise, if this is the last entry, or
  970                  * if the proposed new region fits before the next entry, we
  971                  * win.
  972                  */
  973                 end = start + length;
  974                 if (end > map->max_offset || end < start)
  975                         return (1);
  976                 next = entry->next;
  977                 if (next == &map->header || next->start >= end)
  978                         break;
  979         }
  980         *addr = start;
  981         if (map == kernel_map) {
  982                 vm_offset_t ksize;
  983                 if ((ksize = round_page(start + length)) > kernel_vm_end) {
  984                         pmap_growkernel(ksize);
  985                 }
  986         }
  987         return (0);
  988 }
  989 
  990 /*
  991  *      vm_map_find finds an unallocated region in the target address
  992  *      map with the given length.  The search is defined to be
  993  *      first-fit from the specified address; the region found is
  994  *      returned in the same parameter.
  995  *
  996  *      If object is non-NULL, ref count must be bumped by caller
  997  *      prior to making call to account for the new entry.
  998  */
  999 int
 1000 vm_map_find(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
 1001             vm_offset_t *addr,  /* IN/OUT */
 1002             vm_size_t length, boolean_t find_space, vm_prot_t prot,
 1003             vm_prot_t max, int cow)
 1004 {
 1005         vm_offset_t start;
 1006         int result, s = 0;
 1007 
 1008         start = *addr;
 1009 
 1010         if (map == kmem_map)
 1011                 s = splvm();
 1012 
 1013         vm_map_lock(map);
 1014         if (find_space) {
 1015                 if (vm_map_findspace(map, start, length, addr)) {
 1016                         vm_map_unlock(map);
 1017                         if (map == kmem_map)
 1018                                 splx(s);
 1019                         return (KERN_NO_SPACE);
 1020                 }
 1021                 start = *addr;
 1022         }
 1023         result = vm_map_insert(map, object, offset,
 1024                 start, start + length, prot, max, cow);
 1025         vm_map_unlock(map);
 1026 
 1027         if (map == kmem_map)
 1028                 splx(s);
 1029 
 1030         return (result);
 1031 }
 1032 
 1033 /*
 1034  *      vm_map_simplify_entry:
 1035  *
 1036  *      Simplify the given map entry by merging with either neighbor.  This
 1037  *      routine also has the ability to merge with both neighbors.
 1038  *
 1039  *      The map must be locked.
 1040  *
 1041  *      This routine guarentees that the passed entry remains valid (though
 1042  *      possibly extended).  When merging, this routine may delete one or
 1043  *      both neighbors.
 1044  */
 1045 void
 1046 vm_map_simplify_entry(vm_map_t map, vm_map_entry_t entry)
 1047 {
 1048         vm_map_entry_t next, prev;
 1049         vm_size_t prevsize, esize;
 1050 
 1051         if (entry->eflags & (MAP_ENTRY_IN_TRANSITION | MAP_ENTRY_IS_SUB_MAP))
 1052                 return;
 1053 
 1054         prev = entry->prev;
 1055         if (prev != &map->header) {
 1056                 prevsize = prev->end - prev->start;
 1057                 if ( (prev->end == entry->start) &&
 1058                      (prev->object.vm_object == entry->object.vm_object) &&
 1059                      (!prev->object.vm_object ||
 1060                         (prev->offset + prevsize == entry->offset)) &&
 1061                      (prev->eflags == entry->eflags) &&
 1062                      (prev->protection == entry->protection) &&
 1063                      (prev->max_protection == entry->max_protection) &&
 1064                      (prev->inheritance == entry->inheritance) &&
 1065                      (prev->wired_count == entry->wired_count)) {
 1066                         if (map->first_free == prev)
 1067                                 map->first_free = entry;
 1068                         vm_map_entry_unlink(map, prev);
 1069                         entry->start = prev->start;
 1070                         entry->offset = prev->offset;
 1071                         if (prev->object.vm_object)
 1072                                 vm_object_deallocate(prev->object.vm_object);
 1073                         vm_map_entry_dispose(map, prev);
 1074                 }
 1075         }
 1076 
 1077         next = entry->next;
 1078         if (next != &map->header) {
 1079                 esize = entry->end - entry->start;
 1080                 if ((entry->end == next->start) &&
 1081                     (next->object.vm_object == entry->object.vm_object) &&
 1082                      (!entry->object.vm_object ||
 1083                         (entry->offset + esize == next->offset)) &&
 1084                     (next->eflags == entry->eflags) &&
 1085                     (next->protection == entry->protection) &&
 1086                     (next->max_protection == entry->max_protection) &&
 1087                     (next->inheritance == entry->inheritance) &&
 1088                     (next->wired_count == entry->wired_count)) {
 1089                         if (map->first_free == next)
 1090                                 map->first_free = entry;
 1091                         vm_map_entry_unlink(map, next);
 1092                         entry->end = next->end;
 1093                         if (next->object.vm_object)
 1094                                 vm_object_deallocate(next->object.vm_object);
 1095                         vm_map_entry_dispose(map, next);
 1096                 }
 1097         }
 1098 }
 1099 /*
 1100  *      vm_map_clip_start:      [ internal use only ]
 1101  *
 1102  *      Asserts that the given entry begins at or after
 1103  *      the specified address; if necessary,
 1104  *      it splits the entry into two.
 1105  */
 1106 #define vm_map_clip_start(map, entry, startaddr) \
 1107 { \
 1108         if (startaddr > entry->start) \
 1109                 _vm_map_clip_start(map, entry, startaddr); \
 1110 }
 1111 
 1112 /*
 1113  *      This routine is called only when it is known that
 1114  *      the entry must be split.
 1115  */
 1116 static void
 1117 _vm_map_clip_start(vm_map_t map, vm_map_entry_t entry, vm_offset_t start)
 1118 {
 1119         vm_map_entry_t new_entry;
 1120 
 1121         /*
 1122          * Split off the front portion -- note that we must insert the new
 1123          * entry BEFORE this one, so that this entry has the specified
 1124          * starting address.
 1125          */
 1126         vm_map_simplify_entry(map, entry);
 1127 
 1128         /*
 1129          * If there is no object backing this entry, we might as well create
 1130          * one now.  If we defer it, an object can get created after the map
 1131          * is clipped, and individual objects will be created for the split-up
 1132          * map.  This is a bit of a hack, but is also about the best place to
 1133          * put this improvement.
 1134          */
 1135         if (entry->object.vm_object == NULL && !map->system_map) {
 1136                 vm_object_t object;
 1137                 object = vm_object_allocate(OBJT_DEFAULT,
 1138                                 atop(entry->end - entry->start));
 1139                 entry->object.vm_object = object;
 1140                 entry->offset = 0;
 1141         }
 1142 
 1143         new_entry = vm_map_entry_create(map);
 1144         *new_entry = *entry;
 1145 
 1146         new_entry->end = start;
 1147         entry->offset += (start - entry->start);
 1148         entry->start = start;
 1149 
 1150         vm_map_entry_link(map, entry->prev, new_entry);
 1151 
 1152         if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) {
 1153                 vm_object_reference(new_entry->object.vm_object);
 1154         }
 1155 }
 1156 
 1157 /*
 1158  *      vm_map_clip_end:        [ internal use only ]
 1159  *
 1160  *      Asserts that the given entry ends at or before
 1161  *      the specified address; if necessary,
 1162  *      it splits the entry into two.
 1163  */
 1164 #define vm_map_clip_end(map, entry, endaddr) \
 1165 { \
 1166         if ((endaddr) < (entry->end)) \
 1167                 _vm_map_clip_end((map), (entry), (endaddr)); \
 1168 }
 1169 
 1170 /*
 1171  *      This routine is called only when it is known that
 1172  *      the entry must be split.
 1173  */
 1174 static void
 1175 _vm_map_clip_end(vm_map_t map, vm_map_entry_t entry, vm_offset_t end)
 1176 {
 1177         vm_map_entry_t new_entry;
 1178 
 1179         /*
 1180          * If there is no object backing this entry, we might as well create
 1181          * one now.  If we defer it, an object can get created after the map
 1182          * is clipped, and individual objects will be created for the split-up
 1183          * map.  This is a bit of a hack, but is also about the best place to
 1184          * put this improvement.
 1185          */
 1186         if (entry->object.vm_object == NULL && !map->system_map) {
 1187                 vm_object_t object;
 1188                 object = vm_object_allocate(OBJT_DEFAULT,
 1189                                 atop(entry->end - entry->start));
 1190                 entry->object.vm_object = object;
 1191                 entry->offset = 0;
 1192         }
 1193 
 1194         /*
 1195          * Create a new entry and insert it AFTER the specified entry
 1196          */
 1197         new_entry = vm_map_entry_create(map);
 1198         *new_entry = *entry;
 1199 
 1200         new_entry->start = entry->end = end;
 1201         new_entry->offset += (end - entry->start);
 1202 
 1203         vm_map_entry_link(map, entry, new_entry);
 1204 
 1205         if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) {
 1206                 vm_object_reference(new_entry->object.vm_object);
 1207         }
 1208 }
 1209 
 1210 /*
 1211  *      VM_MAP_RANGE_CHECK:     [ internal use only ]
 1212  *
 1213  *      Asserts that the starting and ending region
 1214  *      addresses fall within the valid range of the map.
 1215  */
 1216 #define VM_MAP_RANGE_CHECK(map, start, end)             \
 1217                 {                                       \
 1218                 if (start < vm_map_min(map))            \
 1219                         start = vm_map_min(map);        \
 1220                 if (end > vm_map_max(map))              \
 1221                         end = vm_map_max(map);          \
 1222                 if (start > end)                        \
 1223                         start = end;                    \
 1224                 }
 1225 
 1226 /*
 1227  *      vm_map_submap:          [ kernel use only ]
 1228  *
 1229  *      Mark the given range as handled by a subordinate map.
 1230  *
 1231  *      This range must have been created with vm_map_find,
 1232  *      and no other operations may have been performed on this
 1233  *      range prior to calling vm_map_submap.
 1234  *
 1235  *      Only a limited number of operations can be performed
 1236  *      within this rage after calling vm_map_submap:
 1237  *              vm_fault
 1238  *      [Don't try vm_map_copy!]
 1239  *
 1240  *      To remove a submapping, one must first remove the
 1241  *      range from the superior map, and then destroy the
 1242  *      submap (if desired).  [Better yet, don't try it.]
 1243  */
 1244 int
 1245 vm_map_submap(
 1246         vm_map_t map,
 1247         vm_offset_t start,
 1248         vm_offset_t end,
 1249         vm_map_t submap)
 1250 {
 1251         vm_map_entry_t entry;
 1252         int result = KERN_INVALID_ARGUMENT;
 1253 
 1254         vm_map_lock(map);
 1255 
 1256         VM_MAP_RANGE_CHECK(map, start, end);
 1257 
 1258         if (vm_map_lookup_entry(map, start, &entry)) {
 1259                 vm_map_clip_start(map, entry, start);
 1260         } else
 1261                 entry = entry->next;
 1262 
 1263         vm_map_clip_end(map, entry, end);
 1264 
 1265         if ((entry->start == start) && (entry->end == end) &&
 1266             ((entry->eflags & MAP_ENTRY_COW) == 0) &&
 1267             (entry->object.vm_object == NULL)) {
 1268                 entry->object.sub_map = submap;
 1269                 entry->eflags |= MAP_ENTRY_IS_SUB_MAP;
 1270                 result = KERN_SUCCESS;
 1271         }
 1272         vm_map_unlock(map);
 1273 
 1274         return (result);
 1275 }
 1276 
 1277 /*
 1278  *      vm_map_protect:
 1279  *
 1280  *      Sets the protection of the specified address
 1281  *      region in the target map.  If "set_max" is
 1282  *      specified, the maximum protection is to be set;
 1283  *      otherwise, only the current protection is affected.
 1284  */
 1285 int
 1286 vm_map_protect(vm_map_t map, vm_offset_t start, vm_offset_t end,
 1287                vm_prot_t new_prot, boolean_t set_max)
 1288 {
 1289         vm_map_entry_t current;
 1290         vm_map_entry_t entry;
 1291 
 1292         vm_map_lock(map);
 1293 
 1294         VM_MAP_RANGE_CHECK(map, start, end);
 1295 
 1296         if (vm_map_lookup_entry(map, start, &entry)) {
 1297                 vm_map_clip_start(map, entry, start);
 1298         } else {
 1299                 entry = entry->next;
 1300         }
 1301 
 1302         /*
 1303          * Make a first pass to check for protection violations.
 1304          */
 1305         current = entry;
 1306         while ((current != &map->header) && (current->start < end)) {
 1307                 if (current->eflags & MAP_ENTRY_IS_SUB_MAP) {
 1308                         vm_map_unlock(map);
 1309                         return (KERN_INVALID_ARGUMENT);
 1310                 }
 1311                 if ((new_prot & current->max_protection) != new_prot) {
 1312                         vm_map_unlock(map);
 1313                         return (KERN_PROTECTION_FAILURE);
 1314                 }
 1315                 current = current->next;
 1316         }
 1317 
 1318         /*
 1319          * Go back and fix up protections. [Note that clipping is not
 1320          * necessary the second time.]
 1321          */
 1322         current = entry;
 1323         while ((current != &map->header) && (current->start < end)) {
 1324                 vm_prot_t old_prot;
 1325 
 1326                 vm_map_clip_end(map, current, end);
 1327 
 1328                 old_prot = current->protection;
 1329                 if (set_max)
 1330                         current->protection =
 1331                             (current->max_protection = new_prot) &
 1332                             old_prot;
 1333                 else
 1334                         current->protection = new_prot;
 1335 
 1336                 /*
 1337                  * Update physical map if necessary. Worry about copy-on-write
 1338                  * here -- CHECK THIS XXX
 1339                  */
 1340                 if (current->protection != old_prot) {
 1341                         mtx_lock(&Giant);
 1342                         vm_page_lock_queues();
 1343 #define MASK(entry)     (((entry)->eflags & MAP_ENTRY_COW) ? ~VM_PROT_WRITE : \
 1344                                                         VM_PROT_ALL)
 1345                         pmap_protect(map->pmap, current->start,
 1346                             current->end,
 1347                             current->protection & MASK(current));
 1348 #undef  MASK
 1349                         vm_page_unlock_queues();
 1350                         mtx_unlock(&Giant);
 1351                 }
 1352                 vm_map_simplify_entry(map, current);
 1353                 current = current->next;
 1354         }
 1355         vm_map_unlock(map);
 1356         return (KERN_SUCCESS);
 1357 }
 1358 
 1359 /*
 1360  *      vm_map_madvise:
 1361  *
 1362  *      This routine traverses a processes map handling the madvise
 1363  *      system call.  Advisories are classified as either those effecting
 1364  *      the vm_map_entry structure, or those effecting the underlying 
 1365  *      objects.
 1366  */
 1367 int
 1368 vm_map_madvise(
 1369         vm_map_t map,
 1370         vm_offset_t start, 
 1371         vm_offset_t end,
 1372         int behav)
 1373 {
 1374         vm_map_entry_t current, entry;
 1375         int modify_map = 0;
 1376 
 1377         /*
 1378          * Some madvise calls directly modify the vm_map_entry, in which case
 1379          * we need to use an exclusive lock on the map and we need to perform 
 1380          * various clipping operations.  Otherwise we only need a read-lock
 1381          * on the map.
 1382          */
 1383         switch(behav) {
 1384         case MADV_NORMAL:
 1385         case MADV_SEQUENTIAL:
 1386         case MADV_RANDOM:
 1387         case MADV_NOSYNC:
 1388         case MADV_AUTOSYNC:
 1389         case MADV_NOCORE:
 1390         case MADV_CORE:
 1391                 modify_map = 1;
 1392                 vm_map_lock(map);
 1393                 break;
 1394         case MADV_WILLNEED:
 1395         case MADV_DONTNEED:
 1396         case MADV_FREE:
 1397                 vm_map_lock_read(map);
 1398                 break;
 1399         default:
 1400                 return (KERN_INVALID_ARGUMENT);
 1401         }
 1402 
 1403         /*
 1404          * Locate starting entry and clip if necessary.
 1405          */
 1406         VM_MAP_RANGE_CHECK(map, start, end);
 1407 
 1408         if (vm_map_lookup_entry(map, start, &entry)) {
 1409                 if (modify_map)
 1410                         vm_map_clip_start(map, entry, start);
 1411         } else {
 1412                 entry = entry->next;
 1413         }
 1414 
 1415         if (modify_map) {
 1416                 /*
 1417                  * madvise behaviors that are implemented in the vm_map_entry.
 1418                  *
 1419                  * We clip the vm_map_entry so that behavioral changes are
 1420                  * limited to the specified address range.
 1421                  */
 1422                 for (current = entry;
 1423                      (current != &map->header) && (current->start < end);
 1424                      current = current->next
 1425                 ) {
 1426                         if (current->eflags & MAP_ENTRY_IS_SUB_MAP)
 1427                                 continue;
 1428 
 1429                         vm_map_clip_end(map, current, end);
 1430 
 1431                         switch (behav) {
 1432                         case MADV_NORMAL:
 1433                                 vm_map_entry_set_behavior(current, MAP_ENTRY_BEHAV_NORMAL);
 1434                                 break;
 1435                         case MADV_SEQUENTIAL:
 1436                                 vm_map_entry_set_behavior(current, MAP_ENTRY_BEHAV_SEQUENTIAL);
 1437                                 break;
 1438                         case MADV_RANDOM:
 1439                                 vm_map_entry_set_behavior(current, MAP_ENTRY_BEHAV_RANDOM);
 1440                                 break;
 1441                         case MADV_NOSYNC:
 1442                                 current->eflags |= MAP_ENTRY_NOSYNC;
 1443                                 break;
 1444                         case MADV_AUTOSYNC:
 1445                                 current->eflags &= ~MAP_ENTRY_NOSYNC;
 1446                                 break;
 1447                         case MADV_NOCORE:
 1448                                 current->eflags |= MAP_ENTRY_NOCOREDUMP;
 1449                                 break;
 1450                         case MADV_CORE:
 1451                                 current->eflags &= ~MAP_ENTRY_NOCOREDUMP;
 1452                                 break;
 1453                         default:
 1454                                 break;
 1455                         }
 1456                         vm_map_simplify_entry(map, current);
 1457                 }
 1458                 vm_map_unlock(map);
 1459         } else {
 1460                 vm_pindex_t pindex;
 1461                 int count;
 1462 
 1463                 /*
 1464                  * madvise behaviors that are implemented in the underlying
 1465                  * vm_object.
 1466                  *
 1467                  * Since we don't clip the vm_map_entry, we have to clip
 1468                  * the vm_object pindex and count.
 1469                  */
 1470                 for (current = entry;
 1471                      (current != &map->header) && (current->start < end);
 1472                      current = current->next
 1473                 ) {
 1474                         vm_offset_t useStart;
 1475 
 1476                         if (current->eflags & MAP_ENTRY_IS_SUB_MAP)
 1477                                 continue;
 1478 
 1479                         pindex = OFF_TO_IDX(current->offset);
 1480                         count = atop(current->end - current->start);
 1481                         useStart = current->start;
 1482 
 1483                         if (current->start < start) {
 1484                                 pindex += atop(start - current->start);
 1485                                 count -= atop(start - current->start);
 1486                                 useStart = start;
 1487                         }
 1488                         if (current->end > end)
 1489                                 count -= atop(current->end - end);
 1490 
 1491                         if (count <= 0)
 1492                                 continue;
 1493 
 1494                         vm_object_madvise(current->object.vm_object,
 1495                                           pindex, count, behav);
 1496                         if (behav == MADV_WILLNEED) {
 1497                                 mtx_lock(&Giant);
 1498                                 pmap_object_init_pt(
 1499                                     map->pmap, 
 1500                                     useStart,
 1501                                     current->object.vm_object,
 1502                                     pindex, 
 1503                                     (count << PAGE_SHIFT),
 1504                                     MAP_PREFAULT_MADVISE
 1505                                 );
 1506                                 mtx_unlock(&Giant);
 1507                         }
 1508                 }
 1509                 vm_map_unlock_read(map);
 1510         }
 1511         return (0);
 1512 }       
 1513 
 1514 
 1515 /*
 1516  *      vm_map_inherit:
 1517  *
 1518  *      Sets the inheritance of the specified address
 1519  *      range in the target map.  Inheritance
 1520  *      affects how the map will be shared with
 1521  *      child maps at the time of vm_map_fork.
 1522  */
 1523 int
 1524 vm_map_inherit(vm_map_t map, vm_offset_t start, vm_offset_t end,
 1525                vm_inherit_t new_inheritance)
 1526 {
 1527         vm_map_entry_t entry;
 1528         vm_map_entry_t temp_entry;
 1529 
 1530         switch (new_inheritance) {
 1531         case VM_INHERIT_NONE:
 1532         case VM_INHERIT_COPY:
 1533         case VM_INHERIT_SHARE:
 1534                 break;
 1535         default:
 1536                 return (KERN_INVALID_ARGUMENT);
 1537         }
 1538         vm_map_lock(map);
 1539         VM_MAP_RANGE_CHECK(map, start, end);
 1540         if (vm_map_lookup_entry(map, start, &temp_entry)) {
 1541                 entry = temp_entry;
 1542                 vm_map_clip_start(map, entry, start);
 1543         } else
 1544                 entry = temp_entry->next;
 1545         while ((entry != &map->header) && (entry->start < end)) {
 1546                 vm_map_clip_end(map, entry, end);
 1547                 entry->inheritance = new_inheritance;
 1548                 vm_map_simplify_entry(map, entry);
 1549                 entry = entry->next;
 1550         }
 1551         vm_map_unlock(map);
 1552         return (KERN_SUCCESS);
 1553 }
 1554 
 1555 /*
 1556  *      vm_map_unwire:
 1557  *
 1558  *      Implements both kernel and user unwiring.
 1559  */
 1560 int
 1561 vm_map_unwire(vm_map_t map, vm_offset_t start, vm_offset_t end,
 1562         boolean_t user_unwire)
 1563 {
 1564         vm_map_entry_t entry, first_entry, tmp_entry;
 1565         vm_offset_t saved_start;
 1566         unsigned int last_timestamp;
 1567         int rv;
 1568         boolean_t need_wakeup, result;
 1569 
 1570         vm_map_lock(map);
 1571         VM_MAP_RANGE_CHECK(map, start, end);
 1572         if (!vm_map_lookup_entry(map, start, &first_entry)) {
 1573                 vm_map_unlock(map);
 1574                 return (KERN_INVALID_ADDRESS);
 1575         }
 1576         last_timestamp = map->timestamp;
 1577         entry = first_entry;
 1578         while (entry != &map->header && entry->start < end) {
 1579                 if (entry->eflags & MAP_ENTRY_IN_TRANSITION) {
 1580                         /*
 1581                          * We have not yet clipped the entry.
 1582                          */
 1583                         saved_start = (start >= entry->start) ? start :
 1584                             entry->start;
 1585                         entry->eflags |= MAP_ENTRY_NEEDS_WAKEUP;
 1586                         if (vm_map_unlock_and_wait(map, user_unwire)) {
 1587                                 /*
 1588                                  * Allow interruption of user unwiring?
 1589                                  */
 1590                         }
 1591                         vm_map_lock(map);
 1592                         if (last_timestamp+1 != map->timestamp) {
 1593                                 /*
 1594                                  * Look again for the entry because the map was
 1595                                  * modified while it was unlocked.
 1596                                  * Specifically, the entry may have been
 1597                                  * clipped, merged, or deleted.
 1598                                  */
 1599                                 if (!vm_map_lookup_entry(map, saved_start,
 1600                                     &tmp_entry)) {
 1601                                         if (saved_start == start) {
 1602                                                 /*
 1603                                                  * First_entry has been deleted.
 1604                                                  */
 1605                                                 vm_map_unlock(map);
 1606                                                 return (KERN_INVALID_ADDRESS);
 1607                                         }
 1608                                         end = saved_start;
 1609                                         rv = KERN_INVALID_ADDRESS;
 1610                                         goto done;
 1611                                 }
 1612                                 if (entry == first_entry)
 1613                                         first_entry = tmp_entry;
 1614                                 else
 1615                                         first_entry = NULL;
 1616                                 entry = tmp_entry;
 1617                         }
 1618                         last_timestamp = map->timestamp;
 1619                         continue;
 1620                 }
 1621                 vm_map_clip_start(map, entry, start);
 1622                 vm_map_clip_end(map, entry, end);
 1623                 /*
 1624                  * Mark the entry in case the map lock is released.  (See
 1625                  * above.)
 1626                  */
 1627                 entry->eflags |= MAP_ENTRY_IN_TRANSITION;
 1628                 /*
 1629                  * Check the map for holes in the specified region.
 1630                  */
 1631                 if (entry->end < end && (entry->next == &map->header ||
 1632                     entry->next->start > entry->end)) {
 1633                         end = entry->end;
 1634                         rv = KERN_INVALID_ADDRESS;
 1635                         goto done;
 1636                 }
 1637                 /*
 1638                  * Require that the entry is wired.
 1639                  */
 1640                 if (entry->wired_count == 0 || (user_unwire &&
 1641                     (entry->eflags & MAP_ENTRY_USER_WIRED) == 0)) {
 1642                         end = entry->end;
 1643                         rv = KERN_INVALID_ARGUMENT;
 1644                         goto done;
 1645                 }
 1646                 entry = entry->next;
 1647         }
 1648         rv = KERN_SUCCESS;
 1649 done:
 1650         need_wakeup = FALSE;
 1651         if (first_entry == NULL) {
 1652                 result = vm_map_lookup_entry(map, start, &first_entry);
 1653                 KASSERT(result, ("vm_map_unwire: lookup failed"));
 1654         }
 1655         entry = first_entry;
 1656         while (entry != &map->header && entry->start < end) {
 1657                 if (rv == KERN_SUCCESS) {
 1658                         if (user_unwire)
 1659                                 entry->eflags &= ~MAP_ENTRY_USER_WIRED;
 1660                         entry->wired_count--;
 1661                         if (entry->wired_count == 0) {
 1662                                 /*
 1663                                  * Retain the map lock.
 1664                                  */
 1665                                 vm_fault_unwire(map, entry->start, entry->end);
 1666                         }
 1667                 }
 1668                 KASSERT(entry->eflags & MAP_ENTRY_IN_TRANSITION,
 1669                         ("vm_map_unwire: in-transition flag missing"));
 1670                 entry->eflags &= ~MAP_ENTRY_IN_TRANSITION;
 1671                 if (entry->eflags & MAP_ENTRY_NEEDS_WAKEUP) {
 1672                         entry->eflags &= ~MAP_ENTRY_NEEDS_WAKEUP;
 1673                         need_wakeup = TRUE;
 1674                 }
 1675                 vm_map_simplify_entry(map, entry);
 1676                 entry = entry->next;
 1677         }
 1678         vm_map_unlock(map);
 1679         if (need_wakeup)
 1680                 vm_map_wakeup(map);
 1681         return (rv);
 1682 }
 1683 
 1684 /*
 1685  *      vm_map_wire:
 1686  *
 1687  *      Implements both kernel and user wiring.
 1688  */
 1689 int
 1690 vm_map_wire(vm_map_t map, vm_offset_t start, vm_offset_t end,
 1691         boolean_t user_wire)
 1692 {
 1693         vm_map_entry_t entry, first_entry, tmp_entry;
 1694         vm_offset_t saved_end, saved_start;
 1695         unsigned int last_timestamp;
 1696         int rv;
 1697         boolean_t need_wakeup, result;
 1698 
 1699         vm_map_lock(map);
 1700         VM_MAP_RANGE_CHECK(map, start, end);
 1701         if (!vm_map_lookup_entry(map, start, &first_entry)) {
 1702                 vm_map_unlock(map);
 1703                 return (KERN_INVALID_ADDRESS);
 1704         }
 1705         last_timestamp = map->timestamp;
 1706         entry = first_entry;
 1707         while (entry != &map->header && entry->start < end) {
 1708                 if (entry->eflags & MAP_ENTRY_IN_TRANSITION) {
 1709                         /*
 1710                          * We have not yet clipped the entry.
 1711                          */
 1712                         saved_start = (start >= entry->start) ? start :
 1713                             entry->start;
 1714                         entry->eflags |= MAP_ENTRY_NEEDS_WAKEUP;
 1715                         if (vm_map_unlock_and_wait(map, user_wire)) {
 1716                                 /*
 1717                                  * Allow interruption of user wiring?
 1718                                  */
 1719                         }
 1720                         vm_map_lock(map);
 1721                         if (last_timestamp + 1 != map->timestamp) {
 1722                                 /*
 1723                                  * Look again for the entry because the map was
 1724                                  * modified while it was unlocked.
 1725                                  * Specifically, the entry may have been
 1726                                  * clipped, merged, or deleted.
 1727                                  */
 1728                                 if (!vm_map_lookup_entry(map, saved_start,
 1729                                     &tmp_entry)) {
 1730                                         if (saved_start == start) {
 1731                                                 /*
 1732                                                  * first_entry has been deleted.
 1733                                                  */
 1734                                                 vm_map_unlock(map);
 1735                                                 return (KERN_INVALID_ADDRESS);
 1736                                         }
 1737                                         end = saved_start;
 1738                                         rv = KERN_INVALID_ADDRESS;
 1739                                         goto done;
 1740                                 }
 1741                                 if (entry == first_entry)
 1742                                         first_entry = tmp_entry;
 1743                                 else
 1744                                         first_entry = NULL;
 1745                                 entry = tmp_entry;
 1746                         }
 1747                         last_timestamp = map->timestamp;
 1748                         continue;
 1749                 }
 1750                 vm_map_clip_start(map, entry, start);
 1751                 vm_map_clip_end(map, entry, end);
 1752                 /*
 1753                  * Mark the entry in case the map lock is released.  (See
 1754                  * above.)
 1755                  */
 1756                 entry->eflags |= MAP_ENTRY_IN_TRANSITION;
 1757                 /*
 1758                  *
 1759                  */
 1760                 if (entry->wired_count == 0) {
 1761                         entry->wired_count++;
 1762                         saved_start = entry->start;
 1763                         saved_end = entry->end;
 1764                         /*
 1765                          * Release the map lock, relying on the in-transition
 1766                          * mark.
 1767                          */
 1768                         vm_map_unlock(map);
 1769                         rv = vm_fault_wire(map, saved_start, saved_end,
 1770                             user_wire);
 1771                         vm_map_lock(map);
 1772                         if (last_timestamp + 1 != map->timestamp) {
 1773                                 /*
 1774                                  * Look again for the entry because the map was
 1775                                  * modified while it was unlocked.  The entry
 1776                                  * may have been clipped, but NOT merged or
 1777                                  * deleted.
 1778                                  */
 1779                                 result = vm_map_lookup_entry(map, saved_start,
 1780                                     &tmp_entry);
 1781                                 KASSERT(result, ("vm_map_wire: lookup failed"));
 1782                                 if (entry == first_entry)
 1783                                         first_entry = tmp_entry;
 1784                                 else
 1785                                         first_entry = NULL;
 1786                                 entry = tmp_entry;
 1787                                 while (entry->end < saved_end) {
 1788                                         if (rv != KERN_SUCCESS) {
 1789                                                 KASSERT(entry->wired_count == 1,
 1790                                                     ("vm_map_wire: bad count"));
 1791                                                 entry->wired_count = -1;
 1792                                         }
 1793                                         entry = entry->next;
 1794                                 }
 1795                         }
 1796                         last_timestamp = map->timestamp;
 1797                         if (rv != KERN_SUCCESS) {
 1798                                 KASSERT(entry->wired_count == 1,
 1799                                     ("vm_map_wire: bad count"));
 1800                                 /*
 1801                                  * Assign an out-of-range value to represent
 1802                                  * the failure to wire this entry.
 1803                                  */
 1804                                 entry->wired_count = -1;
 1805                                 end = entry->end;
 1806                                 goto done;
 1807                         }
 1808                 } else if (!user_wire ||
 1809                            (entry->eflags & MAP_ENTRY_USER_WIRED) == 0) {
 1810                         entry->wired_count++;
 1811                 }
 1812                 /*
 1813                  * Check the map for holes in the specified region.
 1814                  */
 1815                 if (entry->end < end && (entry->next == &map->header ||
 1816                     entry->next->start > entry->end)) {
 1817                         end = entry->end;
 1818                         rv = KERN_INVALID_ADDRESS;
 1819                         goto done;
 1820                 }
 1821                 entry = entry->next;
 1822         }
 1823         rv = KERN_SUCCESS;
 1824 done:
 1825         need_wakeup = FALSE;
 1826         if (first_entry == NULL) {
 1827                 result = vm_map_lookup_entry(map, start, &first_entry);
 1828                 KASSERT(result, ("vm_map_wire: lookup failed"));
 1829         }
 1830         entry = first_entry;
 1831         while (entry != &map->header && entry->start < end) {
 1832                 if (rv == KERN_SUCCESS) {
 1833                         if (user_wire)
 1834                                 entry->eflags |= MAP_ENTRY_USER_WIRED;
 1835                 } else if (entry->wired_count == -1) {
 1836                         /*
 1837                          * Wiring failed on this entry.  Thus, unwiring is
 1838                          * unnecessary.
 1839                          */
 1840                         entry->wired_count = 0;
 1841                 } else {
 1842                         if (!user_wire ||
 1843                             (entry->eflags & MAP_ENTRY_USER_WIRED) == 0)
 1844                                 entry->wired_count--;
 1845                         if (entry->wired_count == 0) {
 1846                                 /*
 1847                                  * Retain the map lock.
 1848                                  */
 1849                                 vm_fault_unwire(map, entry->start, entry->end);
 1850                         }
 1851                 }
 1852                 KASSERT(entry->eflags & MAP_ENTRY_IN_TRANSITION,
 1853                         ("vm_map_wire: in-transition flag missing"));
 1854                 entry->eflags &= ~MAP_ENTRY_IN_TRANSITION;
 1855                 if (entry->eflags & MAP_ENTRY_NEEDS_WAKEUP) {
 1856                         entry->eflags &= ~MAP_ENTRY_NEEDS_WAKEUP;
 1857                         need_wakeup = TRUE;
 1858                 }
 1859                 vm_map_simplify_entry(map, entry);
 1860                 entry = entry->next;
 1861         }
 1862         vm_map_unlock(map);
 1863         if (need_wakeup)
 1864                 vm_map_wakeup(map);
 1865         return (rv);
 1866 }
 1867 
 1868 /*
 1869  * vm_map_clean
 1870  *
 1871  * Push any dirty cached pages in the address range to their pager.
 1872  * If syncio is TRUE, dirty pages are written synchronously.
 1873  * If invalidate is TRUE, any cached pages are freed as well.
 1874  *
 1875  * Returns an error if any part of the specified range is not mapped.
 1876  */
 1877 int
 1878 vm_map_clean(
 1879         vm_map_t map,
 1880         vm_offset_t start,
 1881         vm_offset_t end,
 1882         boolean_t syncio,
 1883         boolean_t invalidate)
 1884 {
 1885         vm_map_entry_t current;
 1886         vm_map_entry_t entry;
 1887         vm_size_t size;
 1888         vm_object_t object;
 1889         vm_ooffset_t offset;
 1890 
 1891         GIANT_REQUIRED;
 1892 
 1893         vm_map_lock_read(map);
 1894         VM_MAP_RANGE_CHECK(map, start, end);
 1895         if (!vm_map_lookup_entry(map, start, &entry)) {
 1896                 vm_map_unlock_read(map);
 1897                 return (KERN_INVALID_ADDRESS);
 1898         }
 1899         /*
 1900          * Make a first pass to check for holes.
 1901          */
 1902         for (current = entry; current->start < end; current = current->next) {
 1903                 if (current->eflags & MAP_ENTRY_IS_SUB_MAP) {
 1904                         vm_map_unlock_read(map);
 1905                         return (KERN_INVALID_ARGUMENT);
 1906                 }
 1907                 if (end > current->end &&
 1908                     (current->next == &map->header ||
 1909                         current->end != current->next->start)) {
 1910                         vm_map_unlock_read(map);
 1911                         return (KERN_INVALID_ADDRESS);
 1912                 }
 1913         }
 1914 
 1915         if (invalidate) {
 1916                 vm_page_lock_queues();
 1917                 pmap_remove(map->pmap, start, end);
 1918                 vm_page_unlock_queues();
 1919         }
 1920         /*
 1921          * Make a second pass, cleaning/uncaching pages from the indicated
 1922          * objects as we go.
 1923          */
 1924         for (current = entry; current->start < end; current = current->next) {
 1925                 offset = current->offset + (start - current->start);
 1926                 size = (end <= current->end ? end : current->end) - start;
 1927                 if (current->eflags & MAP_ENTRY_IS_SUB_MAP) {
 1928                         vm_map_t smap;
 1929                         vm_map_entry_t tentry;
 1930                         vm_size_t tsize;
 1931 
 1932                         smap = current->object.sub_map;
 1933                         vm_map_lock_read(smap);
 1934                         (void) vm_map_lookup_entry(smap, offset, &tentry);
 1935                         tsize = tentry->end - offset;
 1936                         if (tsize < size)
 1937                                 size = tsize;
 1938                         object = tentry->object.vm_object;
 1939                         offset = tentry->offset + (offset - tentry->start);
 1940                         vm_map_unlock_read(smap);
 1941                 } else {
 1942                         object = current->object.vm_object;
 1943                 }
 1944                 /*
 1945                  * Note that there is absolutely no sense in writing out
 1946                  * anonymous objects, so we track down the vnode object
 1947                  * to write out.
 1948                  * We invalidate (remove) all pages from the address space
 1949                  * anyway, for semantic correctness.
 1950                  *
 1951                  * note: certain anonymous maps, such as MAP_NOSYNC maps,
 1952                  * may start out with a NULL object.
 1953                  */
 1954                 while (object && object->backing_object) {
 1955                         object = object->backing_object;
 1956                         offset += object->backing_object_offset;
 1957                         if (object->size < OFF_TO_IDX(offset + size))
 1958                                 size = IDX_TO_OFF(object->size) - offset;
 1959                 }
 1960                 if (object && (object->type == OBJT_VNODE) && 
 1961                     (current->protection & VM_PROT_WRITE)) {
 1962                         /*
 1963                          * Flush pages if writing is allowed, invalidate them
 1964                          * if invalidation requested.  Pages undergoing I/O
 1965                          * will be ignored by vm_object_page_remove().
 1966                          *
 1967                          * We cannot lock the vnode and then wait for paging
 1968                          * to complete without deadlocking against vm_fault.
 1969                          * Instead we simply call vm_object_page_remove() and
 1970                          * allow it to block internally on a page-by-page 
 1971                          * basis when it encounters pages undergoing async 
 1972                          * I/O.
 1973                          */
 1974                         int flags;
 1975 
 1976                         vm_object_reference(object);
 1977                         vn_lock(object->handle, LK_EXCLUSIVE | LK_RETRY, curthread);
 1978                         flags = (syncio || invalidate) ? OBJPC_SYNC : 0;
 1979                         flags |= invalidate ? OBJPC_INVAL : 0;
 1980                         VM_OBJECT_LOCK(object);
 1981                         vm_object_page_clean(object,
 1982                             OFF_TO_IDX(offset),
 1983                             OFF_TO_IDX(offset + size + PAGE_MASK),
 1984                             flags);
 1985                         VM_OBJECT_UNLOCK(object);
 1986                         VOP_UNLOCK(object->handle, 0, curthread);
 1987                         vm_object_deallocate(object);
 1988                 }
 1989                 if (object && invalidate &&
 1990                     ((object->type == OBJT_VNODE) ||
 1991                      (object->type == OBJT_DEVICE))) {
 1992                         VM_OBJECT_LOCK(object);
 1993                         vm_object_page_remove(object,
 1994                             OFF_TO_IDX(offset),
 1995                             OFF_TO_IDX(offset + size + PAGE_MASK),
 1996                             FALSE);
 1997                         VM_OBJECT_UNLOCK(object);
 1998                 }
 1999                 start += size;
 2000         }
 2001 
 2002         vm_map_unlock_read(map);
 2003         return (KERN_SUCCESS);
 2004 }
 2005 
 2006 /*
 2007  *      vm_map_entry_unwire:    [ internal use only ]
 2008  *
 2009  *      Make the region specified by this entry pageable.
 2010  *
 2011  *      The map in question should be locked.
 2012  *      [This is the reason for this routine's existence.]
 2013  */
 2014 static void 
 2015 vm_map_entry_unwire(vm_map_t map, vm_map_entry_t entry)
 2016 {
 2017         vm_fault_unwire(map, entry->start, entry->end);
 2018         entry->wired_count = 0;
 2019 }
 2020 
 2021 /*
 2022  *      vm_map_entry_delete:    [ internal use only ]
 2023  *
 2024  *      Deallocate the given entry from the target map.
 2025  */
 2026 static void
 2027 vm_map_entry_delete(vm_map_t map, vm_map_entry_t entry)
 2028 {
 2029         vm_map_entry_unlink(map, entry);
 2030         map->size -= entry->end - entry->start;
 2031 
 2032         if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) {
 2033                 vm_object_deallocate(entry->object.vm_object);
 2034         }
 2035 
 2036         vm_map_entry_dispose(map, entry);
 2037 }
 2038 
 2039 /*
 2040  *      vm_map_delete:  [ internal use only ]
 2041  *
 2042  *      Deallocates the given address range from the target
 2043  *      map.
 2044  */
 2045 int
 2046 vm_map_delete(vm_map_t map, vm_offset_t start, vm_offset_t end)
 2047 {
 2048         vm_object_t object;
 2049         vm_map_entry_t entry;
 2050         vm_map_entry_t first_entry;
 2051 
 2052         /*
 2053          * Find the start of the region, and clip it
 2054          */
 2055         if (!vm_map_lookup_entry(map, start, &first_entry))
 2056                 entry = first_entry->next;
 2057         else {
 2058                 entry = first_entry;
 2059                 vm_map_clip_start(map, entry, start);
 2060         }
 2061 
 2062         /*
 2063          * Save the free space hint
 2064          */
 2065         if (entry == &map->header) {
 2066                 map->first_free = &map->header;
 2067         } else if (map->first_free->start >= start) {
 2068                 map->first_free = entry->prev;
 2069         }
 2070 
 2071         /*
 2072          * Step through all entries in this region
 2073          */
 2074         while ((entry != &map->header) && (entry->start < end)) {
 2075                 vm_map_entry_t next;
 2076                 vm_offset_t s, e;
 2077                 vm_pindex_t offidxstart, offidxend, count;
 2078 
 2079                 /*
 2080                  * Wait for wiring or unwiring of an entry to complete.
 2081                  */
 2082                 if ((entry->eflags & MAP_ENTRY_IN_TRANSITION) != 0) {
 2083                         unsigned int last_timestamp;
 2084                         vm_offset_t saved_start;
 2085                         vm_map_entry_t tmp_entry;
 2086 
 2087                         saved_start = entry->start;
 2088                         entry->eflags |= MAP_ENTRY_NEEDS_WAKEUP;
 2089                         last_timestamp = map->timestamp;
 2090                         (void) vm_map_unlock_and_wait(map, FALSE);
 2091                         vm_map_lock(map);
 2092                         if (last_timestamp + 1 != map->timestamp) {
 2093                                 /*
 2094                                  * Look again for the entry because the map was
 2095                                  * modified while it was unlocked.
 2096                                  * Specifically, the entry may have been
 2097                                  * clipped, merged, or deleted.
 2098                                  */
 2099                                 if (!vm_map_lookup_entry(map, saved_start,
 2100                                                          &tmp_entry))
 2101                                         entry = tmp_entry->next;
 2102                                 else {
 2103                                         entry = tmp_entry;
 2104                                         vm_map_clip_start(map, entry,
 2105                                                           saved_start);
 2106                                 }
 2107                         }
 2108                         continue;
 2109                 }
 2110                 vm_map_clip_end(map, entry, end);
 2111 
 2112                 s = entry->start;
 2113                 e = entry->end;
 2114                 next = entry->next;
 2115 
 2116                 offidxstart = OFF_TO_IDX(entry->offset);
 2117                 count = OFF_TO_IDX(e - s);
 2118                 object = entry->object.vm_object;
 2119 
 2120                 /*
 2121                  * Unwire before removing addresses from the pmap; otherwise,
 2122                  * unwiring will put the entries back in the pmap.
 2123                  */
 2124                 if (entry->wired_count != 0) {
 2125                         vm_map_entry_unwire(map, entry);
 2126                 }
 2127 
 2128                 offidxend = offidxstart + count;
 2129 
 2130                 if (object == kernel_object || object == kmem_object) {
 2131                         if (object == kernel_object)
 2132                                 GIANT_REQUIRED;
 2133                         VM_OBJECT_LOCK(object);
 2134                         vm_object_page_remove(object, offidxstart, offidxend, FALSE);
 2135                         VM_OBJECT_UNLOCK(object);
 2136                 } else {
 2137                         mtx_lock(&Giant);
 2138                         vm_page_lock_queues();
 2139                         pmap_remove(map->pmap, s, e);
 2140                         vm_page_unlock_queues();
 2141                         if (object != NULL) {
 2142                                 VM_OBJECT_LOCK(object);
 2143                                 if (object->ref_count != 1 &&
 2144                                     (object->flags & (OBJ_NOSPLIT|OBJ_ONEMAPPING)) == OBJ_ONEMAPPING &&
 2145                                     (object->type == OBJT_DEFAULT || object->type == OBJT_SWAP)) {
 2146                                         VM_OBJECT_UNLOCK(object);
 2147                                         vm_object_collapse(object);
 2148                                         VM_OBJECT_LOCK(object);
 2149                                         vm_object_page_remove(object, offidxstart, offidxend, FALSE);
 2150                                         if (object->type == OBJT_SWAP)
 2151                                                 swap_pager_freespace(object, offidxstart, count);
 2152                                         if (offidxend >= object->size &&
 2153                                             offidxstart < object->size)
 2154                                                 object->size = offidxstart;
 2155                                 }
 2156                                 VM_OBJECT_UNLOCK(object);
 2157                         }
 2158                         mtx_unlock(&Giant);
 2159                 }
 2160 
 2161                 /*
 2162                  * Delete the entry (which may delete the object) only after
 2163                  * removing all pmap entries pointing to its pages.
 2164                  * (Otherwise, its page frames may be reallocated, and any
 2165                  * modify bits will be set in the wrong object!)
 2166                  */
 2167                 vm_map_entry_delete(map, entry);
 2168                 entry = next;
 2169         }
 2170         return (KERN_SUCCESS);
 2171 }
 2172 
 2173 /*
 2174  *      vm_map_remove:
 2175  *
 2176  *      Remove the given address range from the target map.
 2177  *      This is the exported form of vm_map_delete.
 2178  */
 2179 int
 2180 vm_map_remove(vm_map_t map, vm_offset_t start, vm_offset_t end)
 2181 {
 2182         int result, s = 0;
 2183 
 2184         if (map == kmem_map)
 2185                 s = splvm();
 2186 
 2187         vm_map_lock(map);
 2188         VM_MAP_RANGE_CHECK(map, start, end);
 2189         result = vm_map_delete(map, start, end);
 2190         vm_map_unlock(map);
 2191 
 2192         if (map == kmem_map)
 2193                 splx(s);
 2194 
 2195         return (result);
 2196 }
 2197 
 2198 /*
 2199  *      vm_map_check_protection:
 2200  *
 2201  *      Assert that the target map allows the specified privilege on the
 2202  *      entire address region given.  The entire region must be allocated.
 2203  *
 2204  *      WARNING!  This code does not and should not check whether the
 2205  *      contents of the region is accessible.  For example a smaller file
 2206  *      might be mapped into a larger address space.
 2207  *
 2208  *      NOTE!  This code is also called by munmap().
 2209  */
 2210 boolean_t
 2211 vm_map_check_protection(vm_map_t map, vm_offset_t start, vm_offset_t end,
 2212                         vm_prot_t protection)
 2213 {
 2214         vm_map_entry_t entry;
 2215         vm_map_entry_t tmp_entry;
 2216 
 2217         vm_map_lock_read(map);
 2218         if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
 2219                 vm_map_unlock_read(map);
 2220                 return (FALSE);
 2221         }
 2222         entry = tmp_entry;
 2223 
 2224         while (start < end) {
 2225                 if (entry == &map->header) {
 2226                         vm_map_unlock_read(map);
 2227                         return (FALSE);
 2228                 }
 2229                 /*
 2230                  * No holes allowed!
 2231                  */
 2232                 if (start < entry->start) {
 2233                         vm_map_unlock_read(map);
 2234                         return (FALSE);
 2235                 }
 2236                 /*
 2237                  * Check protection associated with entry.
 2238                  */
 2239                 if ((entry->protection & protection) != protection) {
 2240                         vm_map_unlock_read(map);
 2241                         return (FALSE);
 2242                 }
 2243                 /* go to next entry */
 2244                 start = entry->end;
 2245                 entry = entry->next;
 2246         }
 2247         vm_map_unlock_read(map);
 2248         return (TRUE);
 2249 }
 2250 
 2251 /*
 2252  *      vm_map_copy_entry:
 2253  *
 2254  *      Copies the contents of the source entry to the destination
 2255  *      entry.  The entries *must* be aligned properly.
 2256  */
 2257 static void
 2258 vm_map_copy_entry(
 2259         vm_map_t src_map,
 2260         vm_map_t dst_map,
 2261         vm_map_entry_t src_entry, 
 2262         vm_map_entry_t dst_entry)
 2263 {
 2264         vm_object_t src_object;
 2265 
 2266         if ((dst_entry->eflags|src_entry->eflags) & MAP_ENTRY_IS_SUB_MAP)
 2267                 return;
 2268 
 2269         if (src_entry->wired_count == 0) {
 2270 
 2271                 /*
 2272                  * If the source entry is marked needs_copy, it is already
 2273                  * write-protected.
 2274                  */
 2275                 if ((src_entry->eflags & MAP_ENTRY_NEEDS_COPY) == 0) {
 2276                         vm_page_lock_queues();
 2277                         pmap_protect(src_map->pmap,
 2278                             src_entry->start,
 2279                             src_entry->end,
 2280                             src_entry->protection & ~VM_PROT_WRITE);
 2281                         vm_page_unlock_queues();
 2282                 }
 2283 
 2284                 /*
 2285                  * Make a copy of the object.
 2286                  */
 2287                 if ((src_object = src_entry->object.vm_object) != NULL) {
 2288 
 2289                         if ((src_object->handle == NULL) &&
 2290                                 (src_object->type == OBJT_DEFAULT ||
 2291                                  src_object->type == OBJT_SWAP)) {
 2292                                 vm_object_collapse(src_object);
 2293                                 if ((src_object->flags & (OBJ_NOSPLIT|OBJ_ONEMAPPING)) == OBJ_ONEMAPPING) {
 2294                                         vm_object_split(src_entry);
 2295                                         src_object = src_entry->object.vm_object;
 2296                                 }
 2297                         }
 2298 
 2299                         vm_object_reference(src_object);
 2300                         VM_OBJECT_LOCK(src_object);
 2301                         vm_object_clear_flag(src_object, OBJ_ONEMAPPING);
 2302                         VM_OBJECT_UNLOCK(src_object);
 2303                         dst_entry->object.vm_object = src_object;
 2304                         src_entry->eflags |= (MAP_ENTRY_COW|MAP_ENTRY_NEEDS_COPY);
 2305                         dst_entry->eflags |= (MAP_ENTRY_COW|MAP_ENTRY_NEEDS_COPY);
 2306                         dst_entry->offset = src_entry->offset;
 2307                 } else {
 2308                         dst_entry->object.vm_object = NULL;
 2309                         dst_entry->offset = 0;
 2310                 }
 2311 
 2312                 pmap_copy(dst_map->pmap, src_map->pmap, dst_entry->start,
 2313                     dst_entry->end - dst_entry->start, src_entry->start);
 2314         } else {
 2315                 /*
 2316                  * Of course, wired down pages can't be set copy-on-write.
 2317                  * Cause wired pages to be copied into the new map by
 2318                  * simulating faults (the new pages are pageable)
 2319                  */
 2320                 vm_fault_copy_entry(dst_map, src_map, dst_entry, src_entry);
 2321         }
 2322 }
 2323 
 2324 /*
 2325  * vmspace_fork:
 2326  * Create a new process vmspace structure and vm_map
 2327  * based on those of an existing process.  The new map
 2328  * is based on the old map, according to the inheritance
 2329  * values on the regions in that map.
 2330  *
 2331  * The source map must not be locked.
 2332  */
 2333 struct vmspace *
 2334 vmspace_fork(struct vmspace *vm1)
 2335 {
 2336         struct vmspace *vm2;
 2337         vm_map_t old_map = &vm1->vm_map;
 2338         vm_map_t new_map;
 2339         vm_map_entry_t old_entry;
 2340         vm_map_entry_t new_entry;
 2341         vm_object_t object;
 2342 
 2343         GIANT_REQUIRED;
 2344 
 2345         vm_map_lock(old_map);
 2346         old_map->infork = 1;
 2347 
 2348         vm2 = vmspace_alloc(old_map->min_offset, old_map->max_offset);
 2349         bcopy(&vm1->vm_startcopy, &vm2->vm_startcopy,
 2350             (caddr_t) &vm1->vm_endcopy - (caddr_t) &vm1->vm_startcopy);
 2351         new_map = &vm2->vm_map; /* XXX */
 2352         new_map->timestamp = 1;
 2353 
 2354         old_entry = old_map->header.next;
 2355 
 2356         while (old_entry != &old_map->header) {
 2357                 if (old_entry->eflags & MAP_ENTRY_IS_SUB_MAP)
 2358                         panic("vm_map_fork: encountered a submap");
 2359 
 2360                 switch (old_entry->inheritance) {
 2361                 case VM_INHERIT_NONE:
 2362                         break;
 2363 
 2364                 case VM_INHERIT_SHARE:
 2365                         /*
 2366                          * Clone the entry, creating the shared object if necessary.
 2367                          */
 2368                         object = old_entry->object.vm_object;
 2369                         if (object == NULL) {
 2370                                 object = vm_object_allocate(OBJT_DEFAULT,
 2371                                         atop(old_entry->end - old_entry->start));
 2372                                 old_entry->object.vm_object = object;
 2373                                 old_entry->offset = (vm_offset_t) 0;
 2374                         }
 2375 
 2376                         /*
 2377                          * Add the reference before calling vm_object_shadow
 2378                          * to insure that a shadow object is created.
 2379                          */
 2380                         vm_object_reference(object);
 2381                         if (old_entry->eflags & MAP_ENTRY_NEEDS_COPY) {
 2382                                 vm_object_shadow(&old_entry->object.vm_object,
 2383                                         &old_entry->offset,
 2384                                         atop(old_entry->end - old_entry->start));
 2385                                 old_entry->eflags &= ~MAP_ENTRY_NEEDS_COPY;
 2386                                 /* Transfer the second reference too. */
 2387                                 vm_object_reference(
 2388                                     old_entry->object.vm_object);
 2389                                 vm_object_deallocate(object);
 2390                                 object = old_entry->object.vm_object;
 2391                         }
 2392                         VM_OBJECT_LOCK(object);
 2393                         vm_object_clear_flag(object, OBJ_ONEMAPPING);
 2394                         VM_OBJECT_UNLOCK(object);
 2395 
 2396                         /*
 2397                          * Clone the entry, referencing the shared object.
 2398                          */
 2399                         new_entry = vm_map_entry_create(new_map);
 2400                         *new_entry = *old_entry;
 2401                         new_entry->eflags &= ~MAP_ENTRY_USER_WIRED;
 2402                         new_entry->wired_count = 0;
 2403 
 2404                         /*
 2405                          * Insert the entry into the new map -- we know we're
 2406                          * inserting at the end of the new map.
 2407                          */
 2408                         vm_map_entry_link(new_map, new_map->header.prev,
 2409                             new_entry);
 2410 
 2411                         /*
 2412                          * Update the physical map
 2413                          */
 2414                         pmap_copy(new_map->pmap, old_map->pmap,
 2415                             new_entry->start,
 2416                             (old_entry->end - old_entry->start),
 2417                             old_entry->start);
 2418                         break;
 2419 
 2420                 case VM_INHERIT_COPY:
 2421                         /*
 2422                          * Clone the entry and link into the map.
 2423                          */
 2424                         new_entry = vm_map_entry_create(new_map);
 2425                         *new_entry = *old_entry;
 2426                         new_entry->eflags &= ~MAP_ENTRY_USER_WIRED;
 2427                         new_entry->wired_count = 0;
 2428                         new_entry->object.vm_object = NULL;
 2429                         vm_map_entry_link(new_map, new_map->header.prev,
 2430                             new_entry);
 2431                         vm_map_copy_entry(old_map, new_map, old_entry,
 2432                             new_entry);
 2433                         break;
 2434                 }
 2435                 old_entry = old_entry->next;
 2436         }
 2437 
 2438         new_map->size = old_map->size;
 2439         old_map->infork = 0;
 2440         vm_map_unlock(old_map);
 2441 
 2442         return (vm2);
 2443 }
 2444 
 2445 int
 2446 vm_map_stack (vm_map_t map, vm_offset_t addrbos, vm_size_t max_ssize,
 2447               vm_prot_t prot, vm_prot_t max, int cow)
 2448 {
 2449         vm_map_entry_t prev_entry;
 2450         vm_map_entry_t new_stack_entry;
 2451         vm_size_t      init_ssize;
 2452         int            rv;
 2453 
 2454         if (addrbos < vm_map_min(map))
 2455                 return (KERN_NO_SPACE);
 2456 
 2457         if (max_ssize < sgrowsiz)
 2458                 init_ssize = max_ssize;
 2459         else
 2460                 init_ssize = sgrowsiz;
 2461 
 2462         vm_map_lock(map);
 2463 
 2464         /* If addr is already mapped, no go */
 2465         if (vm_map_lookup_entry(map, addrbos, &prev_entry)) {
 2466                 vm_map_unlock(map);
 2467                 return (KERN_NO_SPACE);
 2468         }
 2469 
 2470         /* If we would blow our VMEM resource limit, no go */
 2471         if (map->size + init_ssize >
 2472             curthread->td_proc->p_rlimit[RLIMIT_VMEM].rlim_cur) {
 2473                 vm_map_unlock(map);
 2474                 return (KERN_NO_SPACE);
 2475         }
 2476 
 2477         /* If we can't accomodate max_ssize in the current mapping,
 2478          * no go.  However, we need to be aware that subsequent user
 2479          * mappings might map into the space we have reserved for
 2480          * stack, and currently this space is not protected.  
 2481          * 
 2482          * Hopefully we will at least detect this condition 
 2483          * when we try to grow the stack.
 2484          */
 2485         if ((prev_entry->next != &map->header) &&
 2486             (prev_entry->next->start < addrbos + max_ssize)) {
 2487                 vm_map_unlock(map);
 2488                 return (KERN_NO_SPACE);
 2489         }
 2490 
 2491         /* We initially map a stack of only init_ssize.  We will
 2492          * grow as needed later.  Since this is to be a grow 
 2493          * down stack, we map at the top of the range.
 2494          *
 2495          * Note: we would normally expect prot and max to be
 2496          * VM_PROT_ALL, and cow to be 0.  Possibly we should
 2497          * eliminate these as input parameters, and just
 2498          * pass these values here in the insert call.
 2499          */
 2500         rv = vm_map_insert(map, NULL, 0, addrbos + max_ssize - init_ssize,
 2501                            addrbos + max_ssize, prot, max, cow);
 2502 
 2503         /* Now set the avail_ssize amount */
 2504         if (rv == KERN_SUCCESS){
 2505                 if (prev_entry != &map->header)
 2506                         vm_map_clip_end(map, prev_entry, addrbos + max_ssize - init_ssize);
 2507                 new_stack_entry = prev_entry->next;
 2508                 if (new_stack_entry->end   != addrbos + max_ssize ||
 2509                     new_stack_entry->start != addrbos + max_ssize - init_ssize)
 2510                         panic ("Bad entry start/end for new stack entry");
 2511                 else 
 2512                         new_stack_entry->avail_ssize = max_ssize - init_ssize;
 2513         }
 2514 
 2515         vm_map_unlock(map);
 2516         return (rv);
 2517 }
 2518 
 2519 /* Attempts to grow a vm stack entry.  Returns KERN_SUCCESS if the
 2520  * desired address is already mapped, or if we successfully grow
 2521  * the stack.  Also returns KERN_SUCCESS if addr is outside the
 2522  * stack range (this is strange, but preserves compatibility with
 2523  * the grow function in vm_machdep.c).
 2524  */
 2525 int
 2526 vm_map_growstack (struct proc *p, vm_offset_t addr)
 2527 {
 2528         vm_map_entry_t prev_entry;
 2529         vm_map_entry_t stack_entry;
 2530         vm_map_entry_t new_stack_entry;
 2531         struct vmspace *vm = p->p_vmspace;
 2532         vm_map_t map = &vm->vm_map;
 2533         vm_offset_t    end;
 2534         int      grow_amount;
 2535         int      rv;
 2536         int      is_procstack;
 2537 
 2538         GIANT_REQUIRED;
 2539         
 2540 Retry:
 2541         vm_map_lock_read(map);
 2542 
 2543         /* If addr is already in the entry range, no need to grow.*/
 2544         if (vm_map_lookup_entry(map, addr, &prev_entry)) {
 2545                 vm_map_unlock_read(map);
 2546                 return (KERN_SUCCESS);
 2547         }
 2548 
 2549         if ((stack_entry = prev_entry->next) == &map->header) {
 2550                 vm_map_unlock_read(map);
 2551                 return (KERN_SUCCESS);
 2552         } 
 2553         if (prev_entry == &map->header) 
 2554                 end = stack_entry->start - stack_entry->avail_ssize;
 2555         else
 2556                 end = prev_entry->end;
 2557 
 2558         /* This next test mimics the old grow function in vm_machdep.c.
 2559          * It really doesn't quite make sense, but we do it anyway
 2560          * for compatibility.
 2561          *
 2562          * If not growable stack, return success.  This signals the
 2563          * caller to proceed as he would normally with normal vm.
 2564          */
 2565         if (stack_entry->avail_ssize < 1 ||
 2566             addr >= stack_entry->start ||
 2567             addr <  stack_entry->start - stack_entry->avail_ssize) {
 2568                 vm_map_unlock_read(map);
 2569                 return (KERN_SUCCESS);
 2570         } 
 2571         
 2572         /* Find the minimum grow amount */
 2573         grow_amount = roundup (stack_entry->start - addr, PAGE_SIZE);
 2574         if (grow_amount > stack_entry->avail_ssize) {
 2575                 vm_map_unlock_read(map);
 2576                 return (KERN_NO_SPACE);
 2577         }
 2578 
 2579         /* If there is no longer enough space between the entries
 2580          * nogo, and adjust the available space.  Note: this 
 2581          * should only happen if the user has mapped into the
 2582          * stack area after the stack was created, and is
 2583          * probably an error.
 2584          *
 2585          * This also effectively destroys any guard page the user
 2586          * might have intended by limiting the stack size.
 2587          */
 2588         if (grow_amount > stack_entry->start - end) {
 2589                 if (vm_map_lock_upgrade(map))
 2590                         goto Retry;
 2591 
 2592                 stack_entry->avail_ssize = stack_entry->start - end;
 2593 
 2594                 vm_map_unlock(map);
 2595                 return (KERN_NO_SPACE);
 2596         }
 2597 
 2598         is_procstack = addr >= (vm_offset_t)vm->vm_maxsaddr;
 2599 
 2600         /* If this is the main process stack, see if we're over the 
 2601          * stack limit.
 2602          */
 2603         if (is_procstack && (ctob(vm->vm_ssize) + grow_amount >
 2604                              p->p_rlimit[RLIMIT_STACK].rlim_cur)) {
 2605                 vm_map_unlock_read(map);
 2606                 return (KERN_NO_SPACE);
 2607         }
 2608 
 2609         /* Round up the grow amount modulo SGROWSIZ */
 2610         grow_amount = roundup (grow_amount, sgrowsiz);
 2611         if (grow_amount > stack_entry->avail_ssize) {
 2612                 grow_amount = stack_entry->avail_ssize;
 2613         }
 2614         if (is_procstack && (ctob(vm->vm_ssize) + grow_amount >
 2615                              p->p_rlimit[RLIMIT_STACK].rlim_cur)) {
 2616                 grow_amount = p->p_rlimit[RLIMIT_STACK].rlim_cur -
 2617                               ctob(vm->vm_ssize);
 2618         }
 2619 
 2620         /* If we would blow our VMEM resource limit, no go */
 2621         if (map->size + grow_amount >
 2622             curthread->td_proc->p_rlimit[RLIMIT_VMEM].rlim_cur) {
 2623                 vm_map_unlock_read(map);
 2624                 return (KERN_NO_SPACE);
 2625         }
 2626 
 2627         if (vm_map_lock_upgrade(map))
 2628                 goto Retry;
 2629 
 2630         /* Get the preliminary new entry start value */
 2631         addr = stack_entry->start - grow_amount;
 2632 
 2633         /* If this puts us into the previous entry, cut back our growth
 2634          * to the available space.  Also, see the note above.
 2635          */
 2636         if (addr < end) {
 2637                 stack_entry->avail_ssize = stack_entry->start - end;
 2638                 addr = end;
 2639         }
 2640 
 2641         rv = vm_map_insert(map, NULL, 0, addr, stack_entry->start,
 2642             p->p_sysent->sv_stackprot, VM_PROT_ALL, 0);
 2643 
 2644         /* Adjust the available stack space by the amount we grew. */
 2645         if (rv == KERN_SUCCESS) {
 2646                 if (prev_entry != &map->header)
 2647                         vm_map_clip_end(map, prev_entry, addr);
 2648                 new_stack_entry = prev_entry->next;
 2649                 if (new_stack_entry->end   != stack_entry->start  ||
 2650                     new_stack_entry->start != addr)
 2651                         panic ("Bad stack grow start/end in new stack entry");
 2652                 else {
 2653                         new_stack_entry->avail_ssize = stack_entry->avail_ssize -
 2654                                                         (new_stack_entry->end -
 2655                                                          new_stack_entry->start);
 2656                         if (is_procstack)
 2657                                 vm->vm_ssize += btoc(new_stack_entry->end -
 2658                                                      new_stack_entry->start);
 2659                 }
 2660         }
 2661 
 2662         vm_map_unlock(map);
 2663         return (rv);
 2664 }
 2665 
 2666 /*
 2667  * Unshare the specified VM space for exec.  If other processes are
 2668  * mapped to it, then create a new one.  The new vmspace is null.
 2669  */
 2670 void
 2671 vmspace_exec(struct proc *p, vm_offset_t minuser, vm_offset_t maxuser)
 2672 {
 2673         struct vmspace *oldvmspace = p->p_vmspace;
 2674         struct vmspace *newvmspace;
 2675 
 2676         GIANT_REQUIRED;
 2677         newvmspace = vmspace_alloc(minuser, maxuser);
 2678         bcopy(&oldvmspace->vm_startcopy, &newvmspace->vm_startcopy,
 2679             (caddr_t) (newvmspace + 1) - (caddr_t) &newvmspace->vm_startcopy);
 2680         /*
 2681          * This code is written like this for prototype purposes.  The
 2682          * goal is to avoid running down the vmspace here, but let the
 2683          * other process's that are still using the vmspace to finally
 2684          * run it down.  Even though there is little or no chance of blocking
 2685          * here, it is a good idea to keep this form for future mods.
 2686          */
 2687         p->p_vmspace = newvmspace;
 2688         pmap_pinit2(vmspace_pmap(newvmspace));
 2689         vmspace_free(oldvmspace);
 2690         if (p == curthread->td_proc)            /* XXXKSE ? */
 2691                 pmap_activate(curthread);
 2692 }
 2693 
 2694 /*
 2695  * Unshare the specified VM space for forcing COW.  This
 2696  * is called by rfork, for the (RFMEM|RFPROC) == 0 case.
 2697  */
 2698 void
 2699 vmspace_unshare(struct proc *p)
 2700 {
 2701         struct vmspace *oldvmspace = p->p_vmspace;
 2702         struct vmspace *newvmspace;
 2703 
 2704         GIANT_REQUIRED;
 2705         if (oldvmspace->vm_refcnt == 1)
 2706                 return;
 2707         newvmspace = vmspace_fork(oldvmspace);
 2708         p->p_vmspace = newvmspace;
 2709         pmap_pinit2(vmspace_pmap(newvmspace));
 2710         vmspace_free(oldvmspace);
 2711         if (p == curthread->td_proc)            /* XXXKSE ? */
 2712                 pmap_activate(curthread);
 2713 }
 2714 
 2715 /*
 2716  *      vm_map_lookup:
 2717  *
 2718  *      Finds the VM object, offset, and
 2719  *      protection for a given virtual address in the
 2720  *      specified map, assuming a page fault of the
 2721  *      type specified.
 2722  *
 2723  *      Leaves the map in question locked for read; return
 2724  *      values are guaranteed until a vm_map_lookup_done
 2725  *      call is performed.  Note that the map argument
 2726  *      is in/out; the returned map must be used in
 2727  *      the call to vm_map_lookup_done.
 2728  *
 2729  *      A handle (out_entry) is returned for use in
 2730  *      vm_map_lookup_done, to make that fast.
 2731  *
 2732  *      If a lookup is requested with "write protection"
 2733  *      specified, the map may be changed to perform virtual
 2734  *      copying operations, although the data referenced will
 2735  *      remain the same.
 2736  */
 2737 int
 2738 vm_map_lookup(vm_map_t *var_map,                /* IN/OUT */
 2739               vm_offset_t vaddr,
 2740               vm_prot_t fault_typea,
 2741               vm_map_entry_t *out_entry,        /* OUT */
 2742               vm_object_t *object,              /* OUT */
 2743               vm_pindex_t *pindex,              /* OUT */
 2744               vm_prot_t *out_prot,              /* OUT */
 2745               boolean_t *wired)                 /* OUT */
 2746 {
 2747         vm_map_entry_t entry;
 2748         vm_map_t map = *var_map;
 2749         vm_prot_t prot;
 2750         vm_prot_t fault_type = fault_typea;
 2751 
 2752 RetryLookup:;
 2753         /*
 2754          * Lookup the faulting address.
 2755          */
 2756 
 2757         vm_map_lock_read(map);
 2758 #define RETURN(why) \
 2759                 { \
 2760                 vm_map_unlock_read(map); \
 2761                 return (why); \
 2762                 }
 2763 
 2764         /*
 2765          * If the map has an interesting hint, try it before calling full
 2766          * blown lookup routine.
 2767          */
 2768         entry = map->root;
 2769         *out_entry = entry;
 2770         if (entry == NULL ||
 2771             (vaddr < entry->start) || (vaddr >= entry->end)) {
 2772                 /*
 2773                  * Entry was either not a valid hint, or the vaddr was not
 2774                  * contained in the entry, so do a full lookup.
 2775                  */
 2776                 if (!vm_map_lookup_entry(map, vaddr, out_entry))
 2777                         RETURN(KERN_INVALID_ADDRESS);
 2778 
 2779                 entry = *out_entry;
 2780         }
 2781         
 2782         /*
 2783          * Handle submaps.
 2784          */
 2785         if (entry->eflags & MAP_ENTRY_IS_SUB_MAP) {
 2786                 vm_map_t old_map = map;
 2787 
 2788                 *var_map = map = entry->object.sub_map;
 2789                 vm_map_unlock_read(old_map);
 2790                 goto RetryLookup;
 2791         }
 2792 
 2793         /*
 2794          * Check whether this task is allowed to have this page.
 2795          * Note the special case for MAP_ENTRY_COW
 2796          * pages with an override.  This is to implement a forced
 2797          * COW for debuggers.
 2798          */
 2799         if (fault_type & VM_PROT_OVERRIDE_WRITE)
 2800                 prot = entry->max_protection;
 2801         else
 2802                 prot = entry->protection;
 2803         fault_type &= (VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE);
 2804         if ((fault_type & prot) != fault_type) {
 2805                         RETURN(KERN_PROTECTION_FAILURE);
 2806         }
 2807         if ((entry->eflags & MAP_ENTRY_USER_WIRED) &&
 2808             (entry->eflags & MAP_ENTRY_COW) &&
 2809             (fault_type & VM_PROT_WRITE) &&
 2810             (fault_typea & VM_PROT_OVERRIDE_WRITE) == 0) {
 2811                 RETURN(KERN_PROTECTION_FAILURE);
 2812         }
 2813 
 2814         /*
 2815          * If this page is not pageable, we have to get it for all possible
 2816          * accesses.
 2817          */
 2818         *wired = (entry->wired_count != 0);
 2819         if (*wired)
 2820                 prot = fault_type = entry->protection;
 2821 
 2822         /*
 2823          * If the entry was copy-on-write, we either ...
 2824          */
 2825         if (entry->eflags & MAP_ENTRY_NEEDS_COPY) {
 2826                 /*
 2827                  * If we want to write the page, we may as well handle that
 2828                  * now since we've got the map locked.
 2829                  *
 2830                  * If we don't need to write the page, we just demote the
 2831                  * permissions allowed.
 2832                  */
 2833                 if (fault_type & VM_PROT_WRITE) {
 2834                         /*
 2835                          * Make a new object, and place it in the object
 2836                          * chain.  Note that no new references have appeared
 2837                          * -- one just moved from the map to the new
 2838                          * object.
 2839                          */
 2840                         if (vm_map_lock_upgrade(map))
 2841                                 goto RetryLookup;
 2842 
 2843                         vm_object_shadow(
 2844                             &entry->object.vm_object,
 2845                             &entry->offset,
 2846                             atop(entry->end - entry->start));
 2847                         entry->eflags &= ~MAP_ENTRY_NEEDS_COPY;
 2848 
 2849                         vm_map_lock_downgrade(map);
 2850                 } else {
 2851                         /*
 2852                          * We're attempting to read a copy-on-write page --
 2853                          * don't allow writes.
 2854                          */
 2855                         prot &= ~VM_PROT_WRITE;
 2856                 }
 2857         }
 2858 
 2859         /*
 2860          * Create an object if necessary.
 2861          */
 2862         if (entry->object.vm_object == NULL &&
 2863             !map->system_map) {
 2864                 if (vm_map_lock_upgrade(map)) 
 2865                         goto RetryLookup;
 2866                 entry->object.vm_object = vm_object_allocate(OBJT_DEFAULT,
 2867                     atop(entry->end - entry->start));
 2868                 entry->offset = 0;
 2869                 vm_map_lock_downgrade(map);
 2870         }
 2871 
 2872         /*
 2873          * Return the object/offset from this entry.  If the entry was
 2874          * copy-on-write or empty, it has been fixed up.
 2875          */
 2876         *pindex = OFF_TO_IDX((vaddr - entry->start) + entry->offset);
 2877         *object = entry->object.vm_object;
 2878 
 2879         /*
 2880          * Return whether this is the only map sharing this data.
 2881          */
 2882         *out_prot = prot;
 2883         return (KERN_SUCCESS);
 2884 
 2885 #undef  RETURN
 2886 }
 2887 
 2888 /*
 2889  *      vm_map_lookup_done:
 2890  *
 2891  *      Releases locks acquired by a vm_map_lookup
 2892  *      (according to the handle returned by that lookup).
 2893  */
 2894 void
 2895 vm_map_lookup_done(vm_map_t map, vm_map_entry_t entry)
 2896 {
 2897         /*
 2898          * Unlock the main-level map
 2899          */
 2900         vm_map_unlock_read(map);
 2901 }
 2902 
 2903 #include "opt_ddb.h"
 2904 #ifdef DDB
 2905 #include <sys/kernel.h>
 2906 
 2907 #include <ddb/ddb.h>
 2908 
 2909 /*
 2910  *      vm_map_print:   [ debug ]
 2911  */
 2912 DB_SHOW_COMMAND(map, vm_map_print)
 2913 {
 2914         static int nlines;
 2915         /* XXX convert args. */
 2916         vm_map_t map = (vm_map_t)addr;
 2917         boolean_t full = have_addr;
 2918 
 2919         vm_map_entry_t entry;
 2920 
 2921         db_iprintf("Task map %p: pmap=%p, nentries=%d, version=%u\n",
 2922             (void *)map,
 2923             (void *)map->pmap, map->nentries, map->timestamp);
 2924         nlines++;
 2925 
 2926         if (!full && db_indent)
 2927                 return;
 2928 
 2929         db_indent += 2;
 2930         for (entry = map->header.next; entry != &map->header;
 2931             entry = entry->next) {
 2932                 db_iprintf("map entry %p: start=%p, end=%p\n",
 2933                     (void *)entry, (void *)entry->start, (void *)entry->end);
 2934                 nlines++;
 2935                 {
 2936                         static char *inheritance_name[4] =
 2937                         {"share", "copy", "none", "donate_copy"};
 2938 
 2939                         db_iprintf(" prot=%x/%x/%s",
 2940                             entry->protection,
 2941                             entry->max_protection,
 2942                             inheritance_name[(int)(unsigned char)entry->inheritance]);
 2943                         if (entry->wired_count != 0)
 2944                                 db_printf(", wired");
 2945                 }
 2946                 if (entry->eflags & MAP_ENTRY_IS_SUB_MAP) {
 2947                         db_printf(", share=%p, offset=0x%jx\n",
 2948                             (void *)entry->object.sub_map,
 2949                             (uintmax_t)entry->offset);
 2950                         nlines++;
 2951                         if ((entry->prev == &map->header) ||
 2952                             (entry->prev->object.sub_map !=
 2953                                 entry->object.sub_map)) {
 2954                                 db_indent += 2;
 2955                                 vm_map_print((db_expr_t)(intptr_t)
 2956                                              entry->object.sub_map,
 2957                                              full, 0, (char *)0);
 2958                                 db_indent -= 2;
 2959                         }
 2960                 } else {
 2961                         db_printf(", object=%p, offset=0x%jx",
 2962                             (void *)entry->object.vm_object,
 2963                             (uintmax_t)entry->offset);
 2964                         if (entry->eflags & MAP_ENTRY_COW)
 2965                                 db_printf(", copy (%s)",
 2966                                     (entry->eflags & MAP_ENTRY_NEEDS_COPY) ? "needed" : "done");
 2967                         db_printf("\n");
 2968                         nlines++;
 2969 
 2970                         if ((entry->prev == &map->header) ||
 2971                             (entry->prev->object.vm_object !=
 2972                                 entry->object.vm_object)) {
 2973                                 db_indent += 2;
 2974                                 vm_object_print((db_expr_t)(intptr_t)
 2975                                                 entry->object.vm_object,
 2976                                                 full, 0, (char *)0);
 2977                                 nlines += 4;
 2978                                 db_indent -= 2;
 2979                         }
 2980                 }
 2981         }
 2982         db_indent -= 2;
 2983         if (db_indent == 0)
 2984                 nlines = 0;
 2985 }
 2986 
 2987 
 2988 DB_SHOW_COMMAND(procvm, procvm)
 2989 {
 2990         struct proc *p;
 2991 
 2992         if (have_addr) {
 2993                 p = (struct proc *) addr;
 2994         } else {
 2995                 p = curproc;
 2996         }
 2997 
 2998         db_printf("p = %p, vmspace = %p, map = %p, pmap = %p\n",
 2999             (void *)p, (void *)p->p_vmspace, (void *)&p->p_vmspace->vm_map,
 3000             (void *)vmspace_pmap(p->p_vmspace));
 3001 
 3002         vm_map_print((db_expr_t)(intptr_t)&p->p_vmspace->vm_map, 1, 0, NULL);
 3003 }
 3004 
 3005 #endif /* DDB */

Cache object: acb573b25fdc0608a2056c30b7a7c914


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.