The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/vm/vm_map.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 1991, 1993
    3  *      The Regents of the University of California.  All rights reserved.
    4  *
    5  * This code is derived from software contributed to Berkeley by
    6  * The Mach Operating System project at Carnegie-Mellon University.
    7  *
    8  * Redistribution and use in source and binary forms, with or without
    9  * modification, are permitted provided that the following conditions
   10  * are met:
   11  * 1. Redistributions of source code must retain the above copyright
   12  *    notice, this list of conditions and the following disclaimer.
   13  * 2. Redistributions in binary form must reproduce the above copyright
   14  *    notice, this list of conditions and the following disclaimer in the
   15  *    documentation and/or other materials provided with the distribution.
   16  * 4. Neither the name of the University nor the names of its contributors
   17  *    may be used to endorse or promote products derived from this software
   18  *    without specific prior written permission.
   19  *
   20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   30  * SUCH DAMAGE.
   31  *
   32  *      from: @(#)vm_map.c      8.3 (Berkeley) 1/12/94
   33  *
   34  *
   35  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
   36  * All rights reserved.
   37  *
   38  * Authors: Avadis Tevanian, Jr., Michael Wayne Young
   39  *
   40  * Permission to use, copy, modify and distribute this software and
   41  * its documentation is hereby granted, provided that both the copyright
   42  * notice and this permission notice appear in all copies of the
   43  * software, derivative works or modified versions, and any portions
   44  * thereof, and that both notices appear in supporting documentation.
   45  *
   46  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
   47  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
   48  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
   49  *
   50  * Carnegie Mellon requests users of this software to return to
   51  *
   52  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
   53  *  School of Computer Science
   54  *  Carnegie Mellon University
   55  *  Pittsburgh PA 15213-3890
   56  *
   57  * any improvements or extensions that they make and grant Carnegie the
   58  * rights to redistribute these changes.
   59  */
   60 
   61 /*
   62  *      Virtual memory mapping module.
   63  */
   64 
   65 #include <sys/cdefs.h>
   66 __FBSDID("$FreeBSD$");
   67 
   68 #include <sys/param.h>
   69 #include <sys/systm.h>
   70 #include <sys/kernel.h>
   71 #include <sys/ktr.h>
   72 #include <sys/lock.h>
   73 #include <sys/mutex.h>
   74 #include <sys/proc.h>
   75 #include <sys/vmmeter.h>
   76 #include <sys/mman.h>
   77 #include <sys/vnode.h>
   78 #include <sys/racct.h>
   79 #include <sys/resourcevar.h>
   80 #include <sys/rwlock.h>
   81 #include <sys/file.h>
   82 #include <sys/sysctl.h>
   83 #include <sys/sysent.h>
   84 #include <sys/shm.h>
   85 
   86 #include <vm/vm.h>
   87 #include <vm/vm_param.h>
   88 #include <vm/pmap.h>
   89 #include <vm/vm_map.h>
   90 #include <vm/vm_page.h>
   91 #include <vm/vm_object.h>
   92 #include <vm/vm_pager.h>
   93 #include <vm/vm_kern.h>
   94 #include <vm/vm_extern.h>
   95 #include <vm/vnode_pager.h>
   96 #include <vm/swap_pager.h>
   97 #include <vm/uma.h>
   98 
   99 /*
  100  *      Virtual memory maps provide for the mapping, protection,
  101  *      and sharing of virtual memory objects.  In addition,
  102  *      this module provides for an efficient virtual copy of
  103  *      memory from one map to another.
  104  *
  105  *      Synchronization is required prior to most operations.
  106  *
  107  *      Maps consist of an ordered doubly-linked list of simple
  108  *      entries; a self-adjusting binary search tree of these
  109  *      entries is used to speed up lookups.
  110  *
  111  *      Since portions of maps are specified by start/end addresses,
  112  *      which may not align with existing map entries, all
  113  *      routines merely "clip" entries to these start/end values.
  114  *      [That is, an entry is split into two, bordering at a
  115  *      start or end value.]  Note that these clippings may not
  116  *      always be necessary (as the two resulting entries are then
  117  *      not changed); however, the clipping is done for convenience.
  118  *
  119  *      As mentioned above, virtual copy operations are performed
  120  *      by copying VM object references from one map to
  121  *      another, and then marking both regions as copy-on-write.
  122  */
  123 
  124 static struct mtx map_sleep_mtx;
  125 static uma_zone_t mapentzone;
  126 static uma_zone_t kmapentzone;
  127 static uma_zone_t mapzone;
  128 static uma_zone_t vmspace_zone;
  129 static int vmspace_zinit(void *mem, int size, int flags);
  130 static int vm_map_zinit(void *mem, int ize, int flags);
  131 static void _vm_map_init(vm_map_t map, pmap_t pmap, vm_offset_t min,
  132     vm_offset_t max);
  133 static void vm_map_entry_deallocate(vm_map_entry_t entry, boolean_t system_map);
  134 static void vm_map_entry_dispose(vm_map_t map, vm_map_entry_t entry);
  135 static void vm_map_entry_unwire(vm_map_t map, vm_map_entry_t entry);
  136 static int vm_map_growstack(vm_map_t map, vm_offset_t addr,
  137     vm_map_entry_t gap_entry);
  138 #ifdef INVARIANTS
  139 static void vm_map_zdtor(void *mem, int size, void *arg);
  140 static void vmspace_zdtor(void *mem, int size, void *arg);
  141 #endif
  142 static int vm_map_stack_locked(vm_map_t map, vm_offset_t addrbos,
  143     vm_size_t max_ssize, vm_size_t growsize, vm_prot_t prot, vm_prot_t max,
  144     int cow);
  145 static void vm_map_wire_entry_failure(vm_map_t map, vm_map_entry_t entry,
  146     vm_offset_t failed_addr);
  147 
  148 #define ENTRY_CHARGED(e) ((e)->cred != NULL || \
  149     ((e)->object.vm_object != NULL && (e)->object.vm_object->cred != NULL && \
  150      !((e)->eflags & MAP_ENTRY_NEEDS_COPY)))
  151 
  152 /* 
  153  * PROC_VMSPACE_{UN,}LOCK() can be a noop as long as vmspaces are type
  154  * stable.
  155  */
  156 #define PROC_VMSPACE_LOCK(p) do { } while (0)
  157 #define PROC_VMSPACE_UNLOCK(p) do { } while (0)
  158 
  159 /*
  160  *      VM_MAP_RANGE_CHECK:     [ internal use only ]
  161  *
  162  *      Asserts that the starting and ending region
  163  *      addresses fall within the valid range of the map.
  164  */
  165 #define VM_MAP_RANGE_CHECK(map, start, end)             \
  166                 {                                       \
  167                 if (start < vm_map_min(map))            \
  168                         start = vm_map_min(map);        \
  169                 if (end > vm_map_max(map))              \
  170                         end = vm_map_max(map);          \
  171                 if (start > end)                        \
  172                         start = end;                    \
  173                 }
  174 
  175 /*
  176  *      vm_map_startup:
  177  *
  178  *      Initialize the vm_map module.  Must be called before
  179  *      any other vm_map routines.
  180  *
  181  *      Map and entry structures are allocated from the general
  182  *      purpose memory pool with some exceptions:
  183  *
  184  *      - The kernel map and kmem submap are allocated statically.
  185  *      - Kernel map entries are allocated out of a static pool.
  186  *
  187  *      These restrictions are necessary since malloc() uses the
  188  *      maps and requires map entries.
  189  */
  190 
  191 void
  192 vm_map_startup(void)
  193 {
  194         mtx_init(&map_sleep_mtx, "vm map sleep mutex", NULL, MTX_DEF);
  195         mapzone = uma_zcreate("MAP", sizeof(struct vm_map), NULL,
  196 #ifdef INVARIANTS
  197             vm_map_zdtor,
  198 #else
  199             NULL,
  200 #endif
  201             vm_map_zinit, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
  202         uma_prealloc(mapzone, MAX_KMAP);
  203         kmapentzone = uma_zcreate("KMAP ENTRY", sizeof(struct vm_map_entry),
  204             NULL, NULL, NULL, NULL, UMA_ALIGN_PTR,
  205             UMA_ZONE_MTXCLASS | UMA_ZONE_VM);
  206         mapentzone = uma_zcreate("MAP ENTRY", sizeof(struct vm_map_entry),
  207             NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
  208         vmspace_zone = uma_zcreate("VMSPACE", sizeof(struct vmspace), NULL,
  209 #ifdef INVARIANTS
  210             vmspace_zdtor,
  211 #else
  212             NULL,
  213 #endif
  214             vmspace_zinit, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
  215 }
  216 
  217 static int
  218 vmspace_zinit(void *mem, int size, int flags)
  219 {
  220         struct vmspace *vm;
  221 
  222         vm = (struct vmspace *)mem;
  223 
  224         vm->vm_map.pmap = NULL;
  225         (void)vm_map_zinit(&vm->vm_map, sizeof(vm->vm_map), flags);
  226         PMAP_LOCK_INIT(vmspace_pmap(vm));
  227         return (0);
  228 }
  229 
  230 static int
  231 vm_map_zinit(void *mem, int size, int flags)
  232 {
  233         vm_map_t map;
  234 
  235         map = (vm_map_t)mem;
  236         memset(map, 0, sizeof(*map));
  237         mtx_init(&map->system_mtx, "vm map (system)", NULL, MTX_DEF | MTX_DUPOK);
  238         sx_init(&map->lock, "vm map (user)");
  239         return (0);
  240 }
  241 
  242 #ifdef INVARIANTS
  243 static void
  244 vmspace_zdtor(void *mem, int size, void *arg)
  245 {
  246         struct vmspace *vm;
  247 
  248         vm = (struct vmspace *)mem;
  249 
  250         vm_map_zdtor(&vm->vm_map, sizeof(vm->vm_map), arg);
  251 }
  252 static void
  253 vm_map_zdtor(void *mem, int size, void *arg)
  254 {
  255         vm_map_t map;
  256 
  257         map = (vm_map_t)mem;
  258         KASSERT(map->nentries == 0,
  259             ("map %p nentries == %d on free.",
  260             map, map->nentries));
  261         KASSERT(map->size == 0,
  262             ("map %p size == %lu on free.",
  263             map, (unsigned long)map->size));
  264 }
  265 #endif  /* INVARIANTS */
  266 
  267 /*
  268  * Allocate a vmspace structure, including a vm_map and pmap,
  269  * and initialize those structures.  The refcnt is set to 1.
  270  *
  271  * If 'pinit' is NULL then the embedded pmap is initialized via pmap_pinit().
  272  */
  273 struct vmspace *
  274 vmspace_alloc(vm_offset_t min, vm_offset_t max, pmap_pinit_t pinit)
  275 {
  276         struct vmspace *vm;
  277 
  278         vm = uma_zalloc(vmspace_zone, M_WAITOK);
  279 
  280         KASSERT(vm->vm_map.pmap == NULL, ("vm_map.pmap must be NULL"));
  281 
  282         if (pinit == NULL)
  283                 pinit = &pmap_pinit;
  284 
  285         if (!pinit(vmspace_pmap(vm))) {
  286                 uma_zfree(vmspace_zone, vm);
  287                 return (NULL);
  288         }
  289         CTR1(KTR_VM, "vmspace_alloc: %p", vm);
  290         _vm_map_init(&vm->vm_map, vmspace_pmap(vm), min, max);
  291         vm->vm_refcnt = 1;
  292         vm->vm_shm = NULL;
  293         vm->vm_swrss = 0;
  294         vm->vm_tsize = 0;
  295         vm->vm_dsize = 0;
  296         vm->vm_ssize = 0;
  297         vm->vm_taddr = 0;
  298         vm->vm_daddr = 0;
  299         vm->vm_maxsaddr = 0;
  300         return (vm);
  301 }
  302 
  303 #ifdef RACCT
  304 static void
  305 vmspace_container_reset(struct proc *p)
  306 {
  307 
  308         PROC_LOCK(p);
  309         racct_set(p, RACCT_DATA, 0);
  310         racct_set(p, RACCT_STACK, 0);
  311         racct_set(p, RACCT_RSS, 0);
  312         racct_set(p, RACCT_MEMLOCK, 0);
  313         racct_set(p, RACCT_VMEM, 0);
  314         PROC_UNLOCK(p);
  315 }
  316 #endif
  317 
  318 static inline void
  319 vmspace_dofree(struct vmspace *vm)
  320 {
  321 
  322         CTR1(KTR_VM, "vmspace_free: %p", vm);
  323 
  324         /*
  325          * Make sure any SysV shm is freed, it might not have been in
  326          * exit1().
  327          */
  328         shmexit(vm);
  329 
  330         /*
  331          * Lock the map, to wait out all other references to it.
  332          * Delete all of the mappings and pages they hold, then call
  333          * the pmap module to reclaim anything left.
  334          */
  335         (void)vm_map_remove(&vm->vm_map, vm->vm_map.min_offset,
  336             vm->vm_map.max_offset);
  337 
  338         pmap_release(vmspace_pmap(vm));
  339         vm->vm_map.pmap = NULL;
  340         uma_zfree(vmspace_zone, vm);
  341 }
  342 
  343 void
  344 vmspace_free(struct vmspace *vm)
  345 {
  346 
  347         WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
  348             "vmspace_free() called with non-sleepable lock held");
  349 
  350         if (vm->vm_refcnt == 0)
  351                 panic("vmspace_free: attempt to free already freed vmspace");
  352 
  353         if (atomic_fetchadd_int(&vm->vm_refcnt, -1) == 1)
  354                 vmspace_dofree(vm);
  355 }
  356 
  357 void
  358 vmspace_exitfree(struct proc *p)
  359 {
  360         struct vmspace *vm;
  361 
  362         PROC_VMSPACE_LOCK(p);
  363         vm = p->p_vmspace;
  364         p->p_vmspace = NULL;
  365         PROC_VMSPACE_UNLOCK(p);
  366         KASSERT(vm == &vmspace0, ("vmspace_exitfree: wrong vmspace"));
  367         vmspace_free(vm);
  368 }
  369 
  370 void
  371 vmspace_exit(struct thread *td)
  372 {
  373         int refcnt;
  374         struct vmspace *vm;
  375         struct proc *p;
  376 
  377         /*
  378          * Release user portion of address space.
  379          * This releases references to vnodes,
  380          * which could cause I/O if the file has been unlinked.
  381          * Need to do this early enough that we can still sleep.
  382          *
  383          * The last exiting process to reach this point releases as
  384          * much of the environment as it can. vmspace_dofree() is the
  385          * slower fallback in case another process had a temporary
  386          * reference to the vmspace.
  387          */
  388 
  389         p = td->td_proc;
  390         vm = p->p_vmspace;
  391         atomic_add_int(&vmspace0.vm_refcnt, 1);
  392         do {
  393                 refcnt = vm->vm_refcnt;
  394                 if (refcnt > 1 && p->p_vmspace != &vmspace0) {
  395                         /* Switch now since other proc might free vmspace */
  396                         PROC_VMSPACE_LOCK(p);
  397                         p->p_vmspace = &vmspace0;
  398                         PROC_VMSPACE_UNLOCK(p);
  399                         pmap_activate(td);
  400                 }
  401         } while (!atomic_cmpset_int(&vm->vm_refcnt, refcnt, refcnt - 1));
  402         if (refcnt == 1) {
  403                 if (p->p_vmspace != vm) {
  404                         /* vmspace not yet freed, switch back */
  405                         PROC_VMSPACE_LOCK(p);
  406                         p->p_vmspace = vm;
  407                         PROC_VMSPACE_UNLOCK(p);
  408                         pmap_activate(td);
  409                 }
  410                 pmap_remove_pages(vmspace_pmap(vm));
  411                 /* Switch now since this proc will free vmspace */
  412                 PROC_VMSPACE_LOCK(p);
  413                 p->p_vmspace = &vmspace0;
  414                 PROC_VMSPACE_UNLOCK(p);
  415                 pmap_activate(td);
  416                 vmspace_dofree(vm);
  417         }
  418 #ifdef RACCT
  419         if (racct_enable)
  420                 vmspace_container_reset(p);
  421 #endif
  422 }
  423 
  424 /* Acquire reference to vmspace owned by another process. */
  425 
  426 struct vmspace *
  427 vmspace_acquire_ref(struct proc *p)
  428 {
  429         struct vmspace *vm;
  430         int refcnt;
  431 
  432         PROC_VMSPACE_LOCK(p);
  433         vm = p->p_vmspace;
  434         if (vm == NULL) {
  435                 PROC_VMSPACE_UNLOCK(p);
  436                 return (NULL);
  437         }
  438         do {
  439                 refcnt = vm->vm_refcnt;
  440                 if (refcnt <= 0) {      /* Avoid 0->1 transition */
  441                         PROC_VMSPACE_UNLOCK(p);
  442                         return (NULL);
  443                 }
  444         } while (!atomic_cmpset_int(&vm->vm_refcnt, refcnt, refcnt + 1));
  445         if (vm != p->p_vmspace) {
  446                 PROC_VMSPACE_UNLOCK(p);
  447                 vmspace_free(vm);
  448                 return (NULL);
  449         }
  450         PROC_VMSPACE_UNLOCK(p);
  451         return (vm);
  452 }
  453 
  454 void
  455 _vm_map_lock(vm_map_t map, const char *file, int line)
  456 {
  457 
  458         if (map->system_map)
  459                 mtx_lock_flags_(&map->system_mtx, 0, file, line);
  460         else
  461                 sx_xlock_(&map->lock, file, line);
  462         map->timestamp++;
  463 }
  464 
  465 static void
  466 vm_map_process_deferred(void)
  467 {
  468         struct thread *td;
  469         vm_map_entry_t entry, next;
  470         vm_object_t object;
  471 
  472         td = curthread;
  473         entry = td->td_map_def_user;
  474         td->td_map_def_user = NULL;
  475         while (entry != NULL) {
  476                 next = entry->next;
  477                 if ((entry->eflags & MAP_ENTRY_VN_WRITECNT) != 0) {
  478                         /*
  479                          * Decrement the object's writemappings and
  480                          * possibly the vnode's v_writecount.
  481                          */
  482                         KASSERT((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0,
  483                             ("Submap with writecount"));
  484                         object = entry->object.vm_object;
  485                         KASSERT(object != NULL, ("No object for writecount"));
  486                         vnode_pager_release_writecount(object, entry->start,
  487                             entry->end);
  488                 }
  489                 vm_map_entry_deallocate(entry, FALSE);
  490                 entry = next;
  491         }
  492 }
  493 
  494 void
  495 _vm_map_unlock(vm_map_t map, const char *file, int line)
  496 {
  497 
  498         if (map->system_map)
  499                 mtx_unlock_flags_(&map->system_mtx, 0, file, line);
  500         else {
  501                 sx_xunlock_(&map->lock, file, line);
  502                 vm_map_process_deferred();
  503         }
  504 }
  505 
  506 void
  507 _vm_map_lock_read(vm_map_t map, const char *file, int line)
  508 {
  509 
  510         if (map->system_map)
  511                 mtx_lock_flags_(&map->system_mtx, 0, file, line);
  512         else
  513                 sx_slock_(&map->lock, file, line);
  514 }
  515 
  516 void
  517 _vm_map_unlock_read(vm_map_t map, const char *file, int line)
  518 {
  519 
  520         if (map->system_map)
  521                 mtx_unlock_flags_(&map->system_mtx, 0, file, line);
  522         else {
  523                 sx_sunlock_(&map->lock, file, line);
  524                 vm_map_process_deferred();
  525         }
  526 }
  527 
  528 int
  529 _vm_map_trylock(vm_map_t map, const char *file, int line)
  530 {
  531         int error;
  532 
  533         error = map->system_map ?
  534             !mtx_trylock_flags_(&map->system_mtx, 0, file, line) :
  535             !sx_try_xlock_(&map->lock, file, line);
  536         if (error == 0)
  537                 map->timestamp++;
  538         return (error == 0);
  539 }
  540 
  541 int
  542 _vm_map_trylock_read(vm_map_t map, const char *file, int line)
  543 {
  544         int error;
  545 
  546         error = map->system_map ?
  547             !mtx_trylock_flags_(&map->system_mtx, 0, file, line) :
  548             !sx_try_slock_(&map->lock, file, line);
  549         return (error == 0);
  550 }
  551 
  552 /*
  553  *      _vm_map_lock_upgrade:   [ internal use only ]
  554  *
  555  *      Tries to upgrade a read (shared) lock on the specified map to a write
  556  *      (exclusive) lock.  Returns the value "" if the upgrade succeeds and a
  557  *      non-zero value if the upgrade fails.  If the upgrade fails, the map is
  558  *      returned without a read or write lock held.
  559  *
  560  *      Requires that the map be read locked.
  561  */
  562 int
  563 _vm_map_lock_upgrade(vm_map_t map, const char *file, int line)
  564 {
  565         unsigned int last_timestamp;
  566 
  567         if (map->system_map) {
  568                 mtx_assert_(&map->system_mtx, MA_OWNED, file, line);
  569         } else {
  570                 if (!sx_try_upgrade_(&map->lock, file, line)) {
  571                         last_timestamp = map->timestamp;
  572                         sx_sunlock_(&map->lock, file, line);
  573                         vm_map_process_deferred();
  574                         /*
  575                          * If the map's timestamp does not change while the
  576                          * map is unlocked, then the upgrade succeeds.
  577                          */
  578                         sx_xlock_(&map->lock, file, line);
  579                         if (last_timestamp != map->timestamp) {
  580                                 sx_xunlock_(&map->lock, file, line);
  581                                 return (1);
  582                         }
  583                 }
  584         }
  585         map->timestamp++;
  586         return (0);
  587 }
  588 
  589 void
  590 _vm_map_lock_downgrade(vm_map_t map, const char *file, int line)
  591 {
  592 
  593         if (map->system_map) {
  594                 mtx_assert_(&map->system_mtx, MA_OWNED, file, line);
  595         } else
  596                 sx_downgrade_(&map->lock, file, line);
  597 }
  598 
  599 /*
  600  *      vm_map_locked:
  601  *
  602  *      Returns a non-zero value if the caller holds a write (exclusive) lock
  603  *      on the specified map and the value "" otherwise.
  604  */
  605 int
  606 vm_map_locked(vm_map_t map)
  607 {
  608 
  609         if (map->system_map)
  610                 return (mtx_owned(&map->system_mtx));
  611         else
  612                 return (sx_xlocked(&map->lock));
  613 }
  614 
  615 #ifdef INVARIANTS
  616 static void
  617 _vm_map_assert_locked(vm_map_t map, const char *file, int line)
  618 {
  619 
  620         if (map->system_map)
  621                 mtx_assert_(&map->system_mtx, MA_OWNED, file, line);
  622         else
  623                 sx_assert_(&map->lock, SA_XLOCKED, file, line);
  624 }
  625 
  626 #define VM_MAP_ASSERT_LOCKED(map) \
  627     _vm_map_assert_locked(map, LOCK_FILE, LOCK_LINE)
  628 #else
  629 #define VM_MAP_ASSERT_LOCKED(map)
  630 #endif
  631 
  632 /*
  633  *      _vm_map_unlock_and_wait:
  634  *
  635  *      Atomically releases the lock on the specified map and puts the calling
  636  *      thread to sleep.  The calling thread will remain asleep until either
  637  *      vm_map_wakeup() is performed on the map or the specified timeout is
  638  *      exceeded.
  639  *
  640  *      WARNING!  This function does not perform deferred deallocations of
  641  *      objects and map entries.  Therefore, the calling thread is expected to
  642  *      reacquire the map lock after reawakening and later perform an ordinary
  643  *      unlock operation, such as vm_map_unlock(), before completing its
  644  *      operation on the map.
  645  */
  646 int
  647 _vm_map_unlock_and_wait(vm_map_t map, int timo, const char *file, int line)
  648 {
  649 
  650         mtx_lock(&map_sleep_mtx);
  651         if (map->system_map)
  652                 mtx_unlock_flags_(&map->system_mtx, 0, file, line);
  653         else
  654                 sx_xunlock_(&map->lock, file, line);
  655         return (msleep(&map->root, &map_sleep_mtx, PDROP | PVM, "vmmaps",
  656             timo));
  657 }
  658 
  659 /*
  660  *      vm_map_wakeup:
  661  *
  662  *      Awaken any threads that have slept on the map using
  663  *      vm_map_unlock_and_wait().
  664  */
  665 void
  666 vm_map_wakeup(vm_map_t map)
  667 {
  668 
  669         /*
  670          * Acquire and release map_sleep_mtx to prevent a wakeup()
  671          * from being performed (and lost) between the map unlock
  672          * and the msleep() in _vm_map_unlock_and_wait().
  673          */
  674         mtx_lock(&map_sleep_mtx);
  675         mtx_unlock(&map_sleep_mtx);
  676         wakeup(&map->root);
  677 }
  678 
  679 void
  680 vm_map_busy(vm_map_t map)
  681 {
  682 
  683         VM_MAP_ASSERT_LOCKED(map);
  684         map->busy++;
  685 }
  686 
  687 void
  688 vm_map_unbusy(vm_map_t map)
  689 {
  690 
  691         VM_MAP_ASSERT_LOCKED(map);
  692         KASSERT(map->busy, ("vm_map_unbusy: not busy"));
  693         if (--map->busy == 0 && (map->flags & MAP_BUSY_WAKEUP)) {
  694                 vm_map_modflags(map, 0, MAP_BUSY_WAKEUP);
  695                 wakeup(&map->busy);
  696         }
  697 }
  698 
  699 void 
  700 vm_map_wait_busy(vm_map_t map)
  701 {
  702 
  703         VM_MAP_ASSERT_LOCKED(map);
  704         while (map->busy) {
  705                 vm_map_modflags(map, MAP_BUSY_WAKEUP, 0);
  706                 if (map->system_map)
  707                         msleep(&map->busy, &map->system_mtx, 0, "mbusy", 0);
  708                 else
  709                         sx_sleep(&map->busy, &map->lock, 0, "mbusy", 0);
  710         }
  711         map->timestamp++;
  712 }
  713 
  714 long
  715 vmspace_resident_count(struct vmspace *vmspace)
  716 {
  717         return pmap_resident_count(vmspace_pmap(vmspace));
  718 }
  719 
  720 /*
  721  *      vm_map_create:
  722  *
  723  *      Creates and returns a new empty VM map with
  724  *      the given physical map structure, and having
  725  *      the given lower and upper address bounds.
  726  */
  727 vm_map_t
  728 vm_map_create(pmap_t pmap, vm_offset_t min, vm_offset_t max)
  729 {
  730         vm_map_t result;
  731 
  732         result = uma_zalloc(mapzone, M_WAITOK);
  733         CTR1(KTR_VM, "vm_map_create: %p", result);
  734         _vm_map_init(result, pmap, min, max);
  735         return (result);
  736 }
  737 
  738 /*
  739  * Initialize an existing vm_map structure
  740  * such as that in the vmspace structure.
  741  */
  742 static void
  743 _vm_map_init(vm_map_t map, pmap_t pmap, vm_offset_t min, vm_offset_t max)
  744 {
  745 
  746         map->header.next = map->header.prev = &map->header;
  747         map->needs_wakeup = FALSE;
  748         map->system_map = 0;
  749         map->pmap = pmap;
  750         map->min_offset = min;
  751         map->max_offset = max;
  752         map->flags = 0;
  753         map->root = NULL;
  754         map->timestamp = 0;
  755         map->busy = 0;
  756 }
  757 
  758 void
  759 vm_map_init(vm_map_t map, pmap_t pmap, vm_offset_t min, vm_offset_t max)
  760 {
  761 
  762         _vm_map_init(map, pmap, min, max);
  763         mtx_init(&map->system_mtx, "system map", NULL, MTX_DEF | MTX_DUPOK);
  764         sx_init(&map->lock, "user map");
  765 }
  766 
  767 /*
  768  *      vm_map_entry_dispose:   [ internal use only ]
  769  *
  770  *      Inverse of vm_map_entry_create.
  771  */
  772 static void
  773 vm_map_entry_dispose(vm_map_t map, vm_map_entry_t entry)
  774 {
  775         uma_zfree(map->system_map ? kmapentzone : mapentzone, entry);
  776 }
  777 
  778 /*
  779  *      vm_map_entry_create:    [ internal use only ]
  780  *
  781  *      Allocates a VM map entry for insertion.
  782  *      No entry fields are filled in.
  783  */
  784 static vm_map_entry_t
  785 vm_map_entry_create(vm_map_t map)
  786 {
  787         vm_map_entry_t new_entry;
  788 
  789         if (map->system_map)
  790                 new_entry = uma_zalloc(kmapentzone, M_NOWAIT);
  791         else
  792                 new_entry = uma_zalloc(mapentzone, M_WAITOK);
  793         if (new_entry == NULL)
  794                 panic("vm_map_entry_create: kernel resources exhausted");
  795         return (new_entry);
  796 }
  797 
  798 /*
  799  *      vm_map_entry_set_behavior:
  800  *
  801  *      Set the expected access behavior, either normal, random, or
  802  *      sequential.
  803  */
  804 static inline void
  805 vm_map_entry_set_behavior(vm_map_entry_t entry, u_char behavior)
  806 {
  807         entry->eflags = (entry->eflags & ~MAP_ENTRY_BEHAV_MASK) |
  808             (behavior & MAP_ENTRY_BEHAV_MASK);
  809 }
  810 
  811 /*
  812  *      vm_map_entry_set_max_free:
  813  *
  814  *      Set the max_free field in a vm_map_entry.
  815  */
  816 static inline void
  817 vm_map_entry_set_max_free(vm_map_entry_t entry)
  818 {
  819 
  820         entry->max_free = entry->adj_free;
  821         if (entry->left != NULL && entry->left->max_free > entry->max_free)
  822                 entry->max_free = entry->left->max_free;
  823         if (entry->right != NULL && entry->right->max_free > entry->max_free)
  824                 entry->max_free = entry->right->max_free;
  825 }
  826 
  827 /*
  828  *      vm_map_entry_splay:
  829  *
  830  *      The Sleator and Tarjan top-down splay algorithm with the
  831  *      following variation.  Max_free must be computed bottom-up, so
  832  *      on the downward pass, maintain the left and right spines in
  833  *      reverse order.  Then, make a second pass up each side to fix
  834  *      the pointers and compute max_free.  The time bound is O(log n)
  835  *      amortized.
  836  *
  837  *      The new root is the vm_map_entry containing "addr", or else an
  838  *      adjacent entry (lower or higher) if addr is not in the tree.
  839  *
  840  *      The map must be locked, and leaves it so.
  841  *
  842  *      Returns: the new root.
  843  */
  844 static vm_map_entry_t
  845 vm_map_entry_splay(vm_offset_t addr, vm_map_entry_t root)
  846 {
  847         vm_map_entry_t llist, rlist;
  848         vm_map_entry_t ltree, rtree;
  849         vm_map_entry_t y;
  850 
  851         /* Special case of empty tree. */
  852         if (root == NULL)
  853                 return (root);
  854 
  855         /*
  856          * Pass One: Splay down the tree until we find addr or a NULL
  857          * pointer where addr would go.  llist and rlist are the two
  858          * sides in reverse order (bottom-up), with llist linked by
  859          * the right pointer and rlist linked by the left pointer in
  860          * the vm_map_entry.  Wait until Pass Two to set max_free on
  861          * the two spines.
  862          */
  863         llist = NULL;
  864         rlist = NULL;
  865         for (;;) {
  866                 /* root is never NULL in here. */
  867                 if (addr < root->start) {
  868                         y = root->left;
  869                         if (y == NULL)
  870                                 break;
  871                         if (addr < y->start && y->left != NULL) {
  872                                 /* Rotate right and put y on rlist. */
  873                                 root->left = y->right;
  874                                 y->right = root;
  875                                 vm_map_entry_set_max_free(root);
  876                                 root = y->left;
  877                                 y->left = rlist;
  878                                 rlist = y;
  879                         } else {
  880                                 /* Put root on rlist. */
  881                                 root->left = rlist;
  882                                 rlist = root;
  883                                 root = y;
  884                         }
  885                 } else if (addr >= root->end) {
  886                         y = root->right;
  887                         if (y == NULL)
  888                                 break;
  889                         if (addr >= y->end && y->right != NULL) {
  890                                 /* Rotate left and put y on llist. */
  891                                 root->right = y->left;
  892                                 y->left = root;
  893                                 vm_map_entry_set_max_free(root);
  894                                 root = y->right;
  895                                 y->right = llist;
  896                                 llist = y;
  897                         } else {
  898                                 /* Put root on llist. */
  899                                 root->right = llist;
  900                                 llist = root;
  901                                 root = y;
  902                         }
  903                 } else
  904                         break;
  905         }
  906 
  907         /*
  908          * Pass Two: Walk back up the two spines, flip the pointers
  909          * and set max_free.  The subtrees of the root go at the
  910          * bottom of llist and rlist.
  911          */
  912         ltree = root->left;
  913         while (llist != NULL) {
  914                 y = llist->right;
  915                 llist->right = ltree;
  916                 vm_map_entry_set_max_free(llist);
  917                 ltree = llist;
  918                 llist = y;
  919         }
  920         rtree = root->right;
  921         while (rlist != NULL) {
  922                 y = rlist->left;
  923                 rlist->left = rtree;
  924                 vm_map_entry_set_max_free(rlist);
  925                 rtree = rlist;
  926                 rlist = y;
  927         }
  928 
  929         /*
  930          * Final assembly: add ltree and rtree as subtrees of root.
  931          */
  932         root->left = ltree;
  933         root->right = rtree;
  934         vm_map_entry_set_max_free(root);
  935 
  936         return (root);
  937 }
  938 
  939 /*
  940  *      vm_map_entry_{un,}link:
  941  *
  942  *      Insert/remove entries from maps.
  943  */
  944 static void
  945 vm_map_entry_link(vm_map_t map,
  946                   vm_map_entry_t after_where,
  947                   vm_map_entry_t entry)
  948 {
  949 
  950         CTR4(KTR_VM,
  951             "vm_map_entry_link: map %p, nentries %d, entry %p, after %p", map,
  952             map->nentries, entry, after_where);
  953         VM_MAP_ASSERT_LOCKED(map);
  954         KASSERT(after_where == &map->header ||
  955             after_where->end <= entry->start,
  956             ("vm_map_entry_link: prev end %jx new start %jx overlap",
  957             (uintmax_t)after_where->end, (uintmax_t)entry->start));
  958         KASSERT(after_where->next == &map->header ||
  959             entry->end <= after_where->next->start,
  960             ("vm_map_entry_link: new end %jx next start %jx overlap",
  961             (uintmax_t)entry->end, (uintmax_t)after_where->next->start));
  962 
  963         map->nentries++;
  964         entry->prev = after_where;
  965         entry->next = after_where->next;
  966         entry->next->prev = entry;
  967         after_where->next = entry;
  968 
  969         if (after_where != &map->header) {
  970                 if (after_where != map->root)
  971                         vm_map_entry_splay(after_where->start, map->root);
  972                 entry->right = after_where->right;
  973                 entry->left = after_where;
  974                 after_where->right = NULL;
  975                 after_where->adj_free = entry->start - after_where->end;
  976                 vm_map_entry_set_max_free(after_where);
  977         } else {
  978                 entry->right = map->root;
  979                 entry->left = NULL;
  980         }
  981         entry->adj_free = (entry->next == &map->header ? map->max_offset :
  982             entry->next->start) - entry->end;
  983         vm_map_entry_set_max_free(entry);
  984         map->root = entry;
  985 }
  986 
  987 static void
  988 vm_map_entry_unlink(vm_map_t map,
  989                     vm_map_entry_t entry)
  990 {
  991         vm_map_entry_t next, prev, root;
  992 
  993         VM_MAP_ASSERT_LOCKED(map);
  994         if (entry != map->root)
  995                 vm_map_entry_splay(entry->start, map->root);
  996         if (entry->left == NULL)
  997                 root = entry->right;
  998         else {
  999                 root = vm_map_entry_splay(entry->start, entry->left);
 1000                 root->right = entry->right;
 1001                 root->adj_free = (entry->next == &map->header ? map->max_offset :
 1002                     entry->next->start) - root->end;
 1003                 vm_map_entry_set_max_free(root);
 1004         }
 1005         map->root = root;
 1006 
 1007         prev = entry->prev;
 1008         next = entry->next;
 1009         next->prev = prev;
 1010         prev->next = next;
 1011         map->nentries--;
 1012         CTR3(KTR_VM, "vm_map_entry_unlink: map %p, nentries %d, entry %p", map,
 1013             map->nentries, entry);
 1014 }
 1015 
 1016 /*
 1017  *      vm_map_entry_resize_free:
 1018  *
 1019  *      Recompute the amount of free space following a vm_map_entry
 1020  *      and propagate that value up the tree.  Call this function after
 1021  *      resizing a map entry in-place, that is, without a call to
 1022  *      vm_map_entry_link() or _unlink().
 1023  *
 1024  *      The map must be locked, and leaves it so.
 1025  */
 1026 static void
 1027 vm_map_entry_resize_free(vm_map_t map, vm_map_entry_t entry)
 1028 {
 1029 
 1030         /*
 1031          * Using splay trees without parent pointers, propagating
 1032          * max_free up the tree is done by moving the entry to the
 1033          * root and making the change there.
 1034          */
 1035         if (entry != map->root)
 1036                 map->root = vm_map_entry_splay(entry->start, map->root);
 1037 
 1038         entry->adj_free = (entry->next == &map->header ? map->max_offset :
 1039             entry->next->start) - entry->end;
 1040         vm_map_entry_set_max_free(entry);
 1041 }
 1042 
 1043 /*
 1044  *      vm_map_lookup_entry:    [ internal use only ]
 1045  *
 1046  *      Finds the map entry containing (or
 1047  *      immediately preceding) the specified address
 1048  *      in the given map; the entry is returned
 1049  *      in the "entry" parameter.  The boolean
 1050  *      result indicates whether the address is
 1051  *      actually contained in the map.
 1052  */
 1053 boolean_t
 1054 vm_map_lookup_entry(
 1055         vm_map_t map,
 1056         vm_offset_t address,
 1057         vm_map_entry_t *entry)  /* OUT */
 1058 {
 1059         vm_map_entry_t cur;
 1060         boolean_t locked;
 1061 
 1062         /*
 1063          * If the map is empty, then the map entry immediately preceding
 1064          * "address" is the map's header.
 1065          */
 1066         cur = map->root;
 1067         if (cur == NULL)
 1068                 *entry = &map->header;
 1069         else if (address >= cur->start && cur->end > address) {
 1070                 *entry = cur;
 1071                 return (TRUE);
 1072         } else if ((locked = vm_map_locked(map)) ||
 1073             sx_try_upgrade(&map->lock)) {
 1074                 /*
 1075                  * Splay requires a write lock on the map.  However, it only
 1076                  * restructures the binary search tree; it does not otherwise
 1077                  * change the map.  Thus, the map's timestamp need not change
 1078                  * on a temporary upgrade.
 1079                  */
 1080                 map->root = cur = vm_map_entry_splay(address, cur);
 1081                 if (!locked)
 1082                         sx_downgrade(&map->lock);
 1083 
 1084                 /*
 1085                  * If "address" is contained within a map entry, the new root
 1086                  * is that map entry.  Otherwise, the new root is a map entry
 1087                  * immediately before or after "address".
 1088                  */
 1089                 if (address >= cur->start) {
 1090                         *entry = cur;
 1091                         if (cur->end > address)
 1092                                 return (TRUE);
 1093                 } else
 1094                         *entry = cur->prev;
 1095         } else
 1096                 /*
 1097                  * Since the map is only locked for read access, perform a
 1098                  * standard binary search tree lookup for "address".
 1099                  */
 1100                 for (;;) {
 1101                         if (address < cur->start) {
 1102                                 if (cur->left == NULL) {
 1103                                         *entry = cur->prev;
 1104                                         break;
 1105                                 }
 1106                                 cur = cur->left;
 1107                         } else if (cur->end > address) {
 1108                                 *entry = cur;
 1109                                 return (TRUE);
 1110                         } else {
 1111                                 if (cur->right == NULL) {
 1112                                         *entry = cur;
 1113                                         break;
 1114                                 }
 1115                                 cur = cur->right;
 1116                         }
 1117                 }
 1118         return (FALSE);
 1119 }
 1120 
 1121 /*
 1122  *      vm_map_insert:
 1123  *
 1124  *      Inserts the given whole VM object into the target
 1125  *      map at the specified address range.  The object's
 1126  *      size should match that of the address range.
 1127  *
 1128  *      Requires that the map be locked, and leaves it so.
 1129  *
 1130  *      If object is non-NULL, ref count must be bumped by caller
 1131  *      prior to making call to account for the new entry.
 1132  */
 1133 int
 1134 vm_map_insert(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
 1135     vm_offset_t start, vm_offset_t end, vm_prot_t prot, vm_prot_t max, int cow)
 1136 {
 1137         vm_map_entry_t new_entry, prev_entry, temp_entry;
 1138         struct ucred *cred;
 1139         vm_eflags_t protoeflags;
 1140         vm_inherit_t inheritance;
 1141 
 1142         VM_MAP_ASSERT_LOCKED(map);
 1143         KASSERT((object != kmem_object && object != kernel_object) ||
 1144             (cow & MAP_COPY_ON_WRITE) == 0,
 1145             ("vm_map_insert: kmem or kernel object and COW"));
 1146         KASSERT(object == NULL || (cow & MAP_NOFAULT) == 0,
 1147             ("vm_map_insert: paradoxical MAP_NOFAULT request"));
 1148         KASSERT((prot & ~max) == 0,
 1149             ("prot %#x is not subset of max_prot %#x", prot, max));
 1150 
 1151         /*
 1152          * Check that the start and end points are not bogus.
 1153          */
 1154         if (start < map->min_offset || end > map->max_offset || start >= end)
 1155                 return (KERN_INVALID_ADDRESS);
 1156 
 1157         /*
 1158          * Find the entry prior to the proposed starting address; if it's part
 1159          * of an existing entry, this range is bogus.
 1160          */
 1161         if (vm_map_lookup_entry(map, start, &temp_entry))
 1162                 return (KERN_NO_SPACE);
 1163 
 1164         prev_entry = temp_entry;
 1165 
 1166         /*
 1167          * Assert that the next entry doesn't overlap the end point.
 1168          */
 1169         if (prev_entry->next != &map->header && prev_entry->next->start < end)
 1170                 return (KERN_NO_SPACE);
 1171 
 1172         if ((cow & MAP_CREATE_GUARD) != 0 && (object != NULL ||
 1173             max != VM_PROT_NONE))
 1174                 return (KERN_INVALID_ARGUMENT);
 1175 
 1176         protoeflags = 0;
 1177         if (cow & MAP_COPY_ON_WRITE)
 1178                 protoeflags |= MAP_ENTRY_COW | MAP_ENTRY_NEEDS_COPY;
 1179         if (cow & MAP_NOFAULT)
 1180                 protoeflags |= MAP_ENTRY_NOFAULT;
 1181         if (cow & MAP_DISABLE_SYNCER)
 1182                 protoeflags |= MAP_ENTRY_NOSYNC;
 1183         if (cow & MAP_DISABLE_COREDUMP)
 1184                 protoeflags |= MAP_ENTRY_NOCOREDUMP;
 1185         if (cow & MAP_STACK_GROWS_DOWN)
 1186                 protoeflags |= MAP_ENTRY_GROWS_DOWN;
 1187         if (cow & MAP_STACK_GROWS_UP)
 1188                 protoeflags |= MAP_ENTRY_GROWS_UP;
 1189         if (cow & MAP_VN_WRITECOUNT)
 1190                 protoeflags |= MAP_ENTRY_VN_WRITECNT;
 1191         if ((cow & MAP_CREATE_GUARD) != 0)
 1192                 protoeflags |= MAP_ENTRY_GUARD;
 1193         if ((cow & MAP_CREATE_STACK_GAP_DN) != 0)
 1194                 protoeflags |= MAP_ENTRY_STACK_GAP_DN;
 1195         if ((cow & MAP_CREATE_STACK_GAP_UP) != 0)
 1196                 protoeflags |= MAP_ENTRY_STACK_GAP_UP;
 1197         if (cow & MAP_INHERIT_SHARE)
 1198                 inheritance = VM_INHERIT_SHARE;
 1199         else
 1200                 inheritance = VM_INHERIT_DEFAULT;
 1201 
 1202         cred = NULL;
 1203         if ((cow & (MAP_ACC_NO_CHARGE | MAP_NOFAULT | MAP_CREATE_GUARD)) != 0)
 1204                 goto charged;
 1205         if ((cow & MAP_ACC_CHARGED) || ((prot & VM_PROT_WRITE) &&
 1206             ((protoeflags & MAP_ENTRY_NEEDS_COPY) || object == NULL))) {
 1207                 if (!(cow & MAP_ACC_CHARGED) && !swap_reserve(end - start))
 1208                         return (KERN_RESOURCE_SHORTAGE);
 1209                 KASSERT(object == NULL ||
 1210                     (protoeflags & MAP_ENTRY_NEEDS_COPY) != 0 ||
 1211                     object->cred == NULL,
 1212                     ("overcommit: vm_map_insert o %p", object));
 1213                 cred = curthread->td_ucred;
 1214         }
 1215 
 1216 charged:
 1217         /* Expand the kernel pmap, if necessary. */
 1218         if (map == kernel_map && end > kernel_vm_end)
 1219                 pmap_growkernel(end);
 1220         if (object != NULL) {
 1221                 /*
 1222                  * OBJ_ONEMAPPING must be cleared unless this mapping
 1223                  * is trivially proven to be the only mapping for any
 1224                  * of the object's pages.  (Object granularity
 1225                  * reference counting is insufficient to recognize
 1226                  * aliases with precision.)
 1227                  */
 1228                 VM_OBJECT_WLOCK(object);
 1229                 if (object->ref_count > 1 || object->shadow_count != 0)
 1230                         vm_object_clear_flag(object, OBJ_ONEMAPPING);
 1231                 VM_OBJECT_WUNLOCK(object);
 1232         } else if (prev_entry != &map->header &&
 1233             prev_entry->eflags == protoeflags &&
 1234             (cow & (MAP_STACK_GROWS_DOWN | MAP_STACK_GROWS_UP)) == 0 &&
 1235             prev_entry->end == start && prev_entry->wired_count == 0 &&
 1236             (prev_entry->cred == cred ||
 1237             (prev_entry->object.vm_object != NULL &&
 1238             prev_entry->object.vm_object->cred == cred)) &&
 1239             vm_object_coalesce(prev_entry->object.vm_object,
 1240             prev_entry->offset,
 1241             (vm_size_t)(prev_entry->end - prev_entry->start),
 1242             (vm_size_t)(end - prev_entry->end), cred != NULL &&
 1243             (protoeflags & MAP_ENTRY_NEEDS_COPY) == 0)) {
 1244                 /*
 1245                  * We were able to extend the object.  Determine if we
 1246                  * can extend the previous map entry to include the
 1247                  * new range as well.
 1248                  */
 1249                 if (prev_entry->inheritance == inheritance &&
 1250                     prev_entry->protection == prot &&
 1251                     prev_entry->max_protection == max) {
 1252                         if ((prev_entry->eflags & MAP_ENTRY_GUARD) == 0)
 1253                                 map->size += end - prev_entry->end;
 1254                         prev_entry->end = end;
 1255                         vm_map_entry_resize_free(map, prev_entry);
 1256                         vm_map_simplify_entry(map, prev_entry);
 1257                         return (KERN_SUCCESS);
 1258                 }
 1259 
 1260                 /*
 1261                  * If we can extend the object but cannot extend the
 1262                  * map entry, we have to create a new map entry.  We
 1263                  * must bump the ref count on the extended object to
 1264                  * account for it.  object may be NULL.
 1265                  */
 1266                 object = prev_entry->object.vm_object;
 1267                 offset = prev_entry->offset +
 1268                     (prev_entry->end - prev_entry->start);
 1269                 vm_object_reference(object);
 1270                 if (cred != NULL && object != NULL && object->cred != NULL &&
 1271                     !(prev_entry->eflags & MAP_ENTRY_NEEDS_COPY)) {
 1272                         /* Object already accounts for this uid. */
 1273                         cred = NULL;
 1274                 }
 1275         }
 1276         if (cred != NULL)
 1277                 crhold(cred);
 1278 
 1279         /*
 1280          * Create a new entry
 1281          */
 1282         new_entry = vm_map_entry_create(map);
 1283         new_entry->start = start;
 1284         new_entry->end = end;
 1285         new_entry->cred = NULL;
 1286 
 1287         new_entry->eflags = protoeflags;
 1288         new_entry->object.vm_object = object;
 1289         new_entry->offset = offset;
 1290 
 1291         new_entry->inheritance = inheritance;
 1292         new_entry->protection = prot;
 1293         new_entry->max_protection = max;
 1294         new_entry->wired_count = 0;
 1295         new_entry->wiring_thread = NULL;
 1296         new_entry->read_ahead = VM_FAULT_READ_AHEAD_INIT;
 1297         new_entry->next_read = OFF_TO_IDX(offset);
 1298 
 1299         KASSERT(cred == NULL || !ENTRY_CHARGED(new_entry),
 1300             ("overcommit: vm_map_insert leaks vm_map %p", new_entry));
 1301         new_entry->cred = cred;
 1302 
 1303         /*
 1304          * Insert the new entry into the list
 1305          */
 1306         vm_map_entry_link(map, prev_entry, new_entry);
 1307         if ((new_entry->eflags & MAP_ENTRY_GUARD) == 0)
 1308                 map->size += new_entry->end - new_entry->start;
 1309 
 1310         /*
 1311          * Try to coalesce the new entry with both the previous and next
 1312          * entries in the list.  Previously, we only attempted to coalesce
 1313          * with the previous entry when object is NULL.  Here, we handle the
 1314          * other cases, which are less common.
 1315          */
 1316         vm_map_simplify_entry(map, new_entry);
 1317 
 1318         if ((cow & (MAP_PREFAULT | MAP_PREFAULT_PARTIAL)) != 0) {
 1319                 vm_map_pmap_enter(map, start, prot, object, OFF_TO_IDX(offset),
 1320                     end - start, cow & MAP_PREFAULT_PARTIAL);
 1321         }
 1322 
 1323         return (KERN_SUCCESS);
 1324 }
 1325 
 1326 /*
 1327  *      vm_map_findspace:
 1328  *
 1329  *      Find the first fit (lowest VM address) for "length" free bytes
 1330  *      beginning at address >= start in the given map.
 1331  *
 1332  *      In a vm_map_entry, "adj_free" is the amount of free space
 1333  *      adjacent (higher address) to this entry, and "max_free" is the
 1334  *      maximum amount of contiguous free space in its subtree.  This
 1335  *      allows finding a free region in one path down the tree, so
 1336  *      O(log n) amortized with splay trees.
 1337  *
 1338  *      The map must be locked, and leaves it so.
 1339  *
 1340  *      Returns: 0 on success, and starting address in *addr,
 1341  *               1 if insufficient space.
 1342  */
 1343 int
 1344 vm_map_findspace(vm_map_t map, vm_offset_t start, vm_size_t length,
 1345     vm_offset_t *addr)  /* OUT */
 1346 {
 1347         vm_map_entry_t entry;
 1348         vm_offset_t st;
 1349 
 1350         /*
 1351          * Request must fit within min/max VM address and must avoid
 1352          * address wrap.
 1353          */
 1354         if (start < map->min_offset)
 1355                 start = map->min_offset;
 1356         if (start + length > map->max_offset || start + length < start)
 1357                 return (1);
 1358 
 1359         /* Empty tree means wide open address space. */
 1360         if (map->root == NULL) {
 1361                 *addr = start;
 1362                 return (0);
 1363         }
 1364 
 1365         /*
 1366          * After splay, if start comes before root node, then there
 1367          * must be a gap from start to the root.
 1368          */
 1369         map->root = vm_map_entry_splay(start, map->root);
 1370         if (start + length <= map->root->start) {
 1371                 *addr = start;
 1372                 return (0);
 1373         }
 1374 
 1375         /*
 1376          * Root is the last node that might begin its gap before
 1377          * start, and this is the last comparison where address
 1378          * wrap might be a problem.
 1379          */
 1380         st = (start > map->root->end) ? start : map->root->end;
 1381         if (length <= map->root->end + map->root->adj_free - st) {
 1382                 *addr = st;
 1383                 return (0);
 1384         }
 1385 
 1386         /* With max_free, can immediately tell if no solution. */
 1387         entry = map->root->right;
 1388         if (entry == NULL || length > entry->max_free)
 1389                 return (1);
 1390 
 1391         /*
 1392          * Search the right subtree in the order: left subtree, root,
 1393          * right subtree (first fit).  The previous splay implies that
 1394          * all regions in the right subtree have addresses > start.
 1395          */
 1396         while (entry != NULL) {
 1397                 if (entry->left != NULL && entry->left->max_free >= length)
 1398                         entry = entry->left;
 1399                 else if (entry->adj_free >= length) {
 1400                         *addr = entry->end;
 1401                         return (0);
 1402                 } else
 1403                         entry = entry->right;
 1404         }
 1405 
 1406         /* Can't get here, so panic if we do. */
 1407         panic("vm_map_findspace: max_free corrupt");
 1408 }
 1409 
 1410 int
 1411 vm_map_fixed(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
 1412     vm_offset_t start, vm_size_t length, vm_prot_t prot,
 1413     vm_prot_t max, int cow)
 1414 {
 1415         vm_offset_t end;
 1416         int result;
 1417 
 1418         end = start + length;
 1419         KASSERT((cow & (MAP_STACK_GROWS_DOWN | MAP_STACK_GROWS_UP)) == 0 ||
 1420             object == NULL,
 1421             ("vm_map_fixed: non-NULL backing object for stack"));
 1422         vm_map_lock(map);
 1423         VM_MAP_RANGE_CHECK(map, start, end);
 1424         if ((cow & MAP_CHECK_EXCL) == 0)
 1425                 vm_map_delete(map, start, end);
 1426         if ((cow & (MAP_STACK_GROWS_DOWN | MAP_STACK_GROWS_UP)) != 0) {
 1427                 result = vm_map_stack_locked(map, start, length, sgrowsiz,
 1428                     prot, max, cow);
 1429         } else {
 1430                 result = vm_map_insert(map, object, offset, start, end,
 1431                     prot, max, cow);
 1432         }
 1433         vm_map_unlock(map);
 1434         return (result);
 1435 }
 1436 
 1437 /*
 1438  *      vm_map_find finds an unallocated region in the target address
 1439  *      map with the given length.  The search is defined to be
 1440  *      first-fit from the specified address; the region found is
 1441  *      returned in the same parameter.
 1442  *
 1443  *      If object is non-NULL, ref count must be bumped by caller
 1444  *      prior to making call to account for the new entry.
 1445  */
 1446 int
 1447 vm_map_find(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
 1448             vm_offset_t *addr,  /* IN/OUT */
 1449             vm_size_t length, vm_offset_t max_addr, int find_space,
 1450             vm_prot_t prot, vm_prot_t max, int cow)
 1451 {
 1452         vm_offset_t alignment, initial_addr, start;
 1453         int result;
 1454 
 1455         KASSERT((cow & (MAP_STACK_GROWS_DOWN | MAP_STACK_GROWS_UP)) == 0 ||
 1456             object == NULL,
 1457             ("vm_map_find: non-NULL backing object for stack"));
 1458         if (find_space == VMFS_OPTIMAL_SPACE && (object == NULL ||
 1459             (object->flags & OBJ_COLORED) == 0))
 1460                 find_space = VMFS_ANY_SPACE;
 1461         if (find_space >> 8 != 0) {
 1462                 KASSERT((find_space & 0xff) == 0, ("bad VMFS flags"));
 1463                 alignment = (vm_offset_t)1 << (find_space >> 8);
 1464         } else
 1465                 alignment = 0;
 1466         initial_addr = *addr;
 1467 again:
 1468         start = initial_addr;
 1469         vm_map_lock(map);
 1470         do {
 1471                 if (find_space != VMFS_NO_SPACE) {
 1472                         if (vm_map_findspace(map, start, length, addr) ||
 1473                             (max_addr != 0 && *addr + length > max_addr)) {
 1474                                 vm_map_unlock(map);
 1475                                 if (find_space == VMFS_OPTIMAL_SPACE) {
 1476                                         find_space = VMFS_ANY_SPACE;
 1477                                         goto again;
 1478                                 }
 1479                                 return (KERN_NO_SPACE);
 1480                         }
 1481                         switch (find_space) {
 1482                         case VMFS_SUPER_SPACE:
 1483                         case VMFS_OPTIMAL_SPACE:
 1484                                 pmap_align_superpage(object, offset, addr,
 1485                                     length);
 1486                                 break;
 1487                         case VMFS_ANY_SPACE:
 1488                                 break;
 1489                         default:
 1490                                 if ((*addr & (alignment - 1)) != 0) {
 1491                                         *addr &= ~(alignment - 1);
 1492                                         *addr += alignment;
 1493                                 }
 1494                                 break;
 1495                         }
 1496 
 1497                         start = *addr;
 1498                 }
 1499                 if ((cow & (MAP_STACK_GROWS_DOWN | MAP_STACK_GROWS_UP)) != 0) {
 1500                         result = vm_map_stack_locked(map, start, length,
 1501                             sgrowsiz, prot, max, cow);
 1502                 } else {
 1503                         result = vm_map_insert(map, object, offset, start,
 1504                             start + length, prot, max, cow);
 1505                 }
 1506         } while (result == KERN_NO_SPACE && find_space != VMFS_NO_SPACE &&
 1507             find_space != VMFS_ANY_SPACE);
 1508         vm_map_unlock(map);
 1509         return (result);
 1510 }
 1511 
 1512 /*
 1513  *      vm_map_find_min() is a variant of vm_map_find() that takes an
 1514  *      additional parameter (min_addr) and treats the given address
 1515  *      (*addr) differently.  Specifically, it treats *addr as a hint
 1516  *      and not as the minimum address where the mapping is created.
 1517  *
 1518  *      This function works in two phases.  First, it tries to
 1519  *      allocate above the hint.  If that fails and the hint is
 1520  *      greater than min_addr, it performs a second pass, replacing
 1521  *      the hint with min_addr as the minimum address for the
 1522  *      allocation.
 1523  */
 1524 int
 1525 vm_map_find_min(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
 1526     vm_offset_t *addr, vm_size_t length, vm_offset_t min_addr,
 1527     vm_offset_t max_addr, int find_space, vm_prot_t prot, vm_prot_t max,
 1528     int cow)
 1529 {
 1530         vm_offset_t hint;
 1531         int rv;
 1532 
 1533         hint = *addr;
 1534         for (;;) {
 1535                 rv = vm_map_find(map, object, offset, addr, length, max_addr,
 1536                     find_space, prot, max, cow);
 1537                 if (rv == KERN_SUCCESS || min_addr >= hint)
 1538                         return (rv);
 1539                 *addr = hint = min_addr;
 1540         }
 1541 }
 1542 
 1543 /*
 1544  *      vm_map_simplify_entry:
 1545  *
 1546  *      Simplify the given map entry by merging with either neighbor.  This
 1547  *      routine also has the ability to merge with both neighbors.
 1548  *
 1549  *      The map must be locked.
 1550  *
 1551  *      This routine guarentees that the passed entry remains valid (though
 1552  *      possibly extended).  When merging, this routine may delete one or
 1553  *      both neighbors.
 1554  */
 1555 void
 1556 vm_map_simplify_entry(vm_map_t map, vm_map_entry_t entry)
 1557 {
 1558         vm_map_entry_t next, prev;
 1559         vm_size_t prevsize, esize;
 1560 
 1561         if ((entry->eflags & (MAP_ENTRY_GROWS_DOWN | MAP_ENTRY_GROWS_UP |
 1562             MAP_ENTRY_IN_TRANSITION | MAP_ENTRY_IS_SUB_MAP)) != 0)
 1563                 return;
 1564 
 1565         prev = entry->prev;
 1566         if (prev != &map->header) {
 1567                 prevsize = prev->end - prev->start;
 1568                 if ( (prev->end == entry->start) &&
 1569                      (prev->object.vm_object == entry->object.vm_object) &&
 1570                      (!prev->object.vm_object ||
 1571                         (prev->offset + prevsize == entry->offset)) &&
 1572                      (prev->eflags == entry->eflags) &&
 1573                      (prev->protection == entry->protection) &&
 1574                      (prev->max_protection == entry->max_protection) &&
 1575                      (prev->inheritance == entry->inheritance) &&
 1576                      (prev->wired_count == entry->wired_count) &&
 1577                      (prev->cred == entry->cred)) {
 1578                         vm_map_entry_unlink(map, prev);
 1579                         entry->start = prev->start;
 1580                         entry->offset = prev->offset;
 1581                         if (entry->prev != &map->header)
 1582                                 vm_map_entry_resize_free(map, entry->prev);
 1583 
 1584                         /*
 1585                          * If the backing object is a vnode object,
 1586                          * vm_object_deallocate() calls vrele().
 1587                          * However, vrele() does not lock the vnode
 1588                          * because the vnode has additional
 1589                          * references.  Thus, the map lock can be kept
 1590                          * without causing a lock-order reversal with
 1591                          * the vnode lock.
 1592                          *
 1593                          * Since we count the number of virtual page
 1594                          * mappings in object->un_pager.vnp.writemappings,
 1595                          * the writemappings value should not be adjusted
 1596                          * when the entry is disposed of.
 1597                          */
 1598                         if (prev->object.vm_object)
 1599                                 vm_object_deallocate(prev->object.vm_object);
 1600                         if (prev->cred != NULL)
 1601                                 crfree(prev->cred);
 1602                         vm_map_entry_dispose(map, prev);
 1603                 }
 1604         }
 1605 
 1606         next = entry->next;
 1607         if (next != &map->header) {
 1608                 esize = entry->end - entry->start;
 1609                 if ((entry->end == next->start) &&
 1610                     (next->object.vm_object == entry->object.vm_object) &&
 1611                      (!entry->object.vm_object ||
 1612                         (entry->offset + esize == next->offset)) &&
 1613                     (next->eflags == entry->eflags) &&
 1614                     (next->protection == entry->protection) &&
 1615                     (next->max_protection == entry->max_protection) &&
 1616                     (next->inheritance == entry->inheritance) &&
 1617                     (next->wired_count == entry->wired_count) &&
 1618                     (next->cred == entry->cred)) {
 1619                         vm_map_entry_unlink(map, next);
 1620                         entry->end = next->end;
 1621                         vm_map_entry_resize_free(map, entry);
 1622 
 1623                         /*
 1624                          * See comment above.
 1625                          */
 1626                         if (next->object.vm_object)
 1627                                 vm_object_deallocate(next->object.vm_object);
 1628                         if (next->cred != NULL)
 1629                                 crfree(next->cred);
 1630                         vm_map_entry_dispose(map, next);
 1631                 }
 1632         }
 1633 }
 1634 /*
 1635  *      vm_map_clip_start:      [ internal use only ]
 1636  *
 1637  *      Asserts that the given entry begins at or after
 1638  *      the specified address; if necessary,
 1639  *      it splits the entry into two.
 1640  */
 1641 #define vm_map_clip_start(map, entry, startaddr) \
 1642 { \
 1643         if (startaddr > entry->start) \
 1644                 _vm_map_clip_start(map, entry, startaddr); \
 1645 }
 1646 
 1647 /*
 1648  *      This routine is called only when it is known that
 1649  *      the entry must be split.
 1650  */
 1651 static void
 1652 _vm_map_clip_start(vm_map_t map, vm_map_entry_t entry, vm_offset_t start)
 1653 {
 1654         vm_map_entry_t new_entry;
 1655 
 1656         VM_MAP_ASSERT_LOCKED(map);
 1657 
 1658         /*
 1659          * Split off the front portion -- note that we must insert the new
 1660          * entry BEFORE this one, so that this entry has the specified
 1661          * starting address.
 1662          */
 1663         vm_map_simplify_entry(map, entry);
 1664 
 1665         /*
 1666          * If there is no object backing this entry, we might as well create
 1667          * one now.  If we defer it, an object can get created after the map
 1668          * is clipped, and individual objects will be created for the split-up
 1669          * map.  This is a bit of a hack, but is also about the best place to
 1670          * put this improvement.
 1671          */
 1672         if (entry->object.vm_object == NULL && !map->system_map &&
 1673             (entry->eflags & MAP_ENTRY_GUARD) == 0) {
 1674                 vm_object_t object;
 1675                 object = vm_object_allocate(OBJT_DEFAULT,
 1676                                 atop(entry->end - entry->start));
 1677                 entry->object.vm_object = object;
 1678                 entry->offset = 0;
 1679                 if (entry->cred != NULL) {
 1680                         object->cred = entry->cred;
 1681                         object->charge = entry->end - entry->start;
 1682                         entry->cred = NULL;
 1683                 }
 1684         } else if (entry->object.vm_object != NULL &&
 1685                    ((entry->eflags & MAP_ENTRY_NEEDS_COPY) == 0) &&
 1686                    entry->cred != NULL) {
 1687                 VM_OBJECT_WLOCK(entry->object.vm_object);
 1688                 KASSERT(entry->object.vm_object->cred == NULL,
 1689                     ("OVERCOMMIT: vm_entry_clip_start: both cred e %p", entry));
 1690                 entry->object.vm_object->cred = entry->cred;
 1691                 entry->object.vm_object->charge = entry->end - entry->start;
 1692                 VM_OBJECT_WUNLOCK(entry->object.vm_object);
 1693                 entry->cred = NULL;
 1694         }
 1695 
 1696         new_entry = vm_map_entry_create(map);
 1697         *new_entry = *entry;
 1698 
 1699         new_entry->end = start;
 1700         entry->offset += (start - entry->start);
 1701         entry->start = start;
 1702         if (new_entry->cred != NULL)
 1703                 crhold(entry->cred);
 1704 
 1705         vm_map_entry_link(map, entry->prev, new_entry);
 1706 
 1707         if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) {
 1708                 vm_object_reference(new_entry->object.vm_object);
 1709                 /*
 1710                  * The object->un_pager.vnp.writemappings for the
 1711                  * object of MAP_ENTRY_VN_WRITECNT type entry shall be
 1712                  * kept as is here.  The virtual pages are
 1713                  * re-distributed among the clipped entries, so the sum is
 1714                  * left the same.
 1715                  */
 1716         }
 1717 }
 1718 
 1719 /*
 1720  *      vm_map_clip_end:        [ internal use only ]
 1721  *
 1722  *      Asserts that the given entry ends at or before
 1723  *      the specified address; if necessary,
 1724  *      it splits the entry into two.
 1725  */
 1726 #define vm_map_clip_end(map, entry, endaddr) \
 1727 { \
 1728         if ((endaddr) < (entry->end)) \
 1729                 _vm_map_clip_end((map), (entry), (endaddr)); \
 1730 }
 1731 
 1732 /*
 1733  *      This routine is called only when it is known that
 1734  *      the entry must be split.
 1735  */
 1736 static void
 1737 _vm_map_clip_end(vm_map_t map, vm_map_entry_t entry, vm_offset_t end)
 1738 {
 1739         vm_map_entry_t new_entry;
 1740 
 1741         VM_MAP_ASSERT_LOCKED(map);
 1742 
 1743         /*
 1744          * If there is no object backing this entry, we might as well create
 1745          * one now.  If we defer it, an object can get created after the map
 1746          * is clipped, and individual objects will be created for the split-up
 1747          * map.  This is a bit of a hack, but is also about the best place to
 1748          * put this improvement.
 1749          */
 1750         if (entry->object.vm_object == NULL && !map->system_map &&
 1751             (entry->eflags & MAP_ENTRY_GUARD) == 0) {
 1752                 vm_object_t object;
 1753                 object = vm_object_allocate(OBJT_DEFAULT,
 1754                                 atop(entry->end - entry->start));
 1755                 entry->object.vm_object = object;
 1756                 entry->offset = 0;
 1757                 if (entry->cred != NULL) {
 1758                         object->cred = entry->cred;
 1759                         object->charge = entry->end - entry->start;
 1760                         entry->cred = NULL;
 1761                 }
 1762         } else if (entry->object.vm_object != NULL &&
 1763                    ((entry->eflags & MAP_ENTRY_NEEDS_COPY) == 0) &&
 1764                    entry->cred != NULL) {
 1765                 VM_OBJECT_WLOCK(entry->object.vm_object);
 1766                 KASSERT(entry->object.vm_object->cred == NULL,
 1767                     ("OVERCOMMIT: vm_entry_clip_end: both cred e %p", entry));
 1768                 entry->object.vm_object->cred = entry->cred;
 1769                 entry->object.vm_object->charge = entry->end - entry->start;
 1770                 VM_OBJECT_WUNLOCK(entry->object.vm_object);
 1771                 entry->cred = NULL;
 1772         }
 1773 
 1774         /*
 1775          * Create a new entry and insert it AFTER the specified entry
 1776          */
 1777         new_entry = vm_map_entry_create(map);
 1778         *new_entry = *entry;
 1779 
 1780         new_entry->start = entry->end = end;
 1781         new_entry->offset += (end - entry->start);
 1782         if (new_entry->cred != NULL)
 1783                 crhold(entry->cred);
 1784 
 1785         vm_map_entry_link(map, entry, new_entry);
 1786 
 1787         if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) {
 1788                 vm_object_reference(new_entry->object.vm_object);
 1789         }
 1790 }
 1791 
 1792 /*
 1793  *      vm_map_submap:          [ kernel use only ]
 1794  *
 1795  *      Mark the given range as handled by a subordinate map.
 1796  *
 1797  *      This range must have been created with vm_map_find,
 1798  *      and no other operations may have been performed on this
 1799  *      range prior to calling vm_map_submap.
 1800  *
 1801  *      Only a limited number of operations can be performed
 1802  *      within this rage after calling vm_map_submap:
 1803  *              vm_fault
 1804  *      [Don't try vm_map_copy!]
 1805  *
 1806  *      To remove a submapping, one must first remove the
 1807  *      range from the superior map, and then destroy the
 1808  *      submap (if desired).  [Better yet, don't try it.]
 1809  */
 1810 int
 1811 vm_map_submap(
 1812         vm_map_t map,
 1813         vm_offset_t start,
 1814         vm_offset_t end,
 1815         vm_map_t submap)
 1816 {
 1817         vm_map_entry_t entry;
 1818         int result = KERN_INVALID_ARGUMENT;
 1819 
 1820         vm_map_lock(map);
 1821 
 1822         VM_MAP_RANGE_CHECK(map, start, end);
 1823 
 1824         if (vm_map_lookup_entry(map, start, &entry)) {
 1825                 vm_map_clip_start(map, entry, start);
 1826         } else
 1827                 entry = entry->next;
 1828 
 1829         vm_map_clip_end(map, entry, end);
 1830 
 1831         if ((entry->start == start) && (entry->end == end) &&
 1832             ((entry->eflags & MAP_ENTRY_COW) == 0) &&
 1833             (entry->object.vm_object == NULL)) {
 1834                 entry->object.sub_map = submap;
 1835                 entry->eflags |= MAP_ENTRY_IS_SUB_MAP;
 1836                 result = KERN_SUCCESS;
 1837         }
 1838         vm_map_unlock(map);
 1839 
 1840         return (result);
 1841 }
 1842 
 1843 /*
 1844  * The maximum number of pages to map if MAP_PREFAULT_PARTIAL is specified
 1845  */
 1846 #define MAX_INIT_PT     96
 1847 
 1848 /*
 1849  *      vm_map_pmap_enter:
 1850  *
 1851  *      Preload the specified map's pmap with mappings to the specified
 1852  *      object's memory-resident pages.  No further physical pages are
 1853  *      allocated, and no further virtual pages are retrieved from secondary
 1854  *      storage.  If the specified flags include MAP_PREFAULT_PARTIAL, then a
 1855  *      limited number of page mappings are created at the low-end of the
 1856  *      specified address range.  (For this purpose, a superpage mapping
 1857  *      counts as one page mapping.)  Otherwise, all resident pages within
 1858  *      the specified address range are mapped.  Because these mappings are
 1859  *      being created speculatively, cached pages are not reactivated and
 1860  *      mapped.
 1861  */
 1862 void
 1863 vm_map_pmap_enter(vm_map_t map, vm_offset_t addr, vm_prot_t prot,
 1864     vm_object_t object, vm_pindex_t pindex, vm_size_t size, int flags)
 1865 {
 1866         vm_offset_t start;
 1867         vm_page_t p, p_start;
 1868         vm_pindex_t mask, psize, threshold, tmpidx;
 1869 
 1870         if ((prot & (VM_PROT_READ | VM_PROT_EXECUTE)) == 0 || object == NULL)
 1871                 return;
 1872         VM_OBJECT_RLOCK(object);
 1873         if (object->type == OBJT_DEVICE || object->type == OBJT_SG) {
 1874                 VM_OBJECT_RUNLOCK(object);
 1875                 VM_OBJECT_WLOCK(object);
 1876                 if (object->type == OBJT_DEVICE || object->type == OBJT_SG) {
 1877                         pmap_object_init_pt(map->pmap, addr, object, pindex,
 1878                             size);
 1879                         VM_OBJECT_WUNLOCK(object);
 1880                         return;
 1881                 }
 1882                 VM_OBJECT_LOCK_DOWNGRADE(object);
 1883         }
 1884 
 1885         psize = atop(size);
 1886         if (psize + pindex > object->size) {
 1887                 if (object->size < pindex) {
 1888                         VM_OBJECT_RUNLOCK(object);
 1889                         return;
 1890                 }
 1891                 psize = object->size - pindex;
 1892         }
 1893 
 1894         start = 0;
 1895         p_start = NULL;
 1896         threshold = MAX_INIT_PT;
 1897 
 1898         p = vm_page_find_least(object, pindex);
 1899         /*
 1900          * Assert: the variable p is either (1) the page with the
 1901          * least pindex greater than or equal to the parameter pindex
 1902          * or (2) NULL.
 1903          */
 1904         for (;
 1905              p != NULL && (tmpidx = p->pindex - pindex) < psize;
 1906              p = TAILQ_NEXT(p, listq)) {
 1907                 /*
 1908                  * don't allow an madvise to blow away our really
 1909                  * free pages allocating pv entries.
 1910                  */
 1911                 if (((flags & MAP_PREFAULT_MADVISE) != 0 &&
 1912                     cnt.v_free_count < cnt.v_free_reserved) ||
 1913                     ((flags & MAP_PREFAULT_PARTIAL) != 0 &&
 1914                     tmpidx >= threshold)) {
 1915                         psize = tmpidx;
 1916                         break;
 1917                 }
 1918                 if (p->valid == VM_PAGE_BITS_ALL) {
 1919                         if (p_start == NULL) {
 1920                                 start = addr + ptoa(tmpidx);
 1921                                 p_start = p;
 1922                         }
 1923                         /* Jump ahead if a superpage mapping is possible. */
 1924                         if (p->psind > 0 && ((addr + ptoa(tmpidx)) &
 1925                             (pagesizes[p->psind] - 1)) == 0) {
 1926                                 mask = atop(pagesizes[p->psind]) - 1;
 1927                                 if (tmpidx + mask < psize &&
 1928                                     vm_page_ps_is_valid(p)) {
 1929                                         p += mask;
 1930                                         threshold += mask;
 1931                                 }
 1932                         }
 1933                 } else if (p_start != NULL) {
 1934                         pmap_enter_object(map->pmap, start, addr +
 1935                             ptoa(tmpidx), p_start, prot);
 1936                         p_start = NULL;
 1937                 }
 1938         }
 1939         if (p_start != NULL)
 1940                 pmap_enter_object(map->pmap, start, addr + ptoa(psize),
 1941                     p_start, prot);
 1942         VM_OBJECT_RUNLOCK(object);
 1943 }
 1944 
 1945 /*
 1946  *      vm_map_protect:
 1947  *
 1948  *      Sets the protection of the specified address
 1949  *      region in the target map.  If "set_max" is
 1950  *      specified, the maximum protection is to be set;
 1951  *      otherwise, only the current protection is affected.
 1952  */
 1953 int
 1954 vm_map_protect(vm_map_t map, vm_offset_t start, vm_offset_t end,
 1955                vm_prot_t new_prot, boolean_t set_max)
 1956 {
 1957         vm_map_entry_t current, entry;
 1958         vm_object_t obj;
 1959         struct ucred *cred;
 1960         vm_prot_t old_prot;
 1961 
 1962         if (start == end)
 1963                 return (KERN_SUCCESS);
 1964 
 1965         vm_map_lock(map);
 1966 
 1967         VM_MAP_RANGE_CHECK(map, start, end);
 1968 
 1969         if (vm_map_lookup_entry(map, start, &entry)) {
 1970                 vm_map_clip_start(map, entry, start);
 1971         } else {
 1972                 entry = entry->next;
 1973         }
 1974 
 1975         /*
 1976          * Make a first pass to check for protection violations.
 1977          */
 1978         for (current = entry; current != &map->header && current->start < end;
 1979             current = current->next) {
 1980                 if ((current->eflags & MAP_ENTRY_GUARD) != 0)
 1981                         continue;
 1982                 if (current->eflags & MAP_ENTRY_IS_SUB_MAP) {
 1983                         vm_map_unlock(map);
 1984                         return (KERN_INVALID_ARGUMENT);
 1985                 }
 1986                 if ((new_prot & current->max_protection) != new_prot) {
 1987                         vm_map_unlock(map);
 1988                         return (KERN_PROTECTION_FAILURE);
 1989                 }
 1990         }
 1991 
 1992         /*
 1993          * Do an accounting pass for private read-only mappings that
 1994          * now will do cow due to allowed write (e.g. debugger sets
 1995          * breakpoint on text segment)
 1996          */
 1997         for (current = entry; current != &map->header && current->start < end;
 1998             current = current->next) {
 1999 
 2000                 vm_map_clip_end(map, current, end);
 2001 
 2002                 if (set_max ||
 2003                     ((new_prot & ~(current->protection)) & VM_PROT_WRITE) == 0 ||
 2004                     ENTRY_CHARGED(current) ||
 2005                     (current->eflags & MAP_ENTRY_GUARD) != 0) {
 2006                         continue;
 2007                 }
 2008 
 2009                 cred = curthread->td_ucred;
 2010                 obj = current->object.vm_object;
 2011 
 2012                 if (obj == NULL || (current->eflags & MAP_ENTRY_NEEDS_COPY)) {
 2013                         if (!swap_reserve(current->end - current->start)) {
 2014                                 vm_map_unlock(map);
 2015                                 return (KERN_RESOURCE_SHORTAGE);
 2016                         }
 2017                         crhold(cred);
 2018                         current->cred = cred;
 2019                         continue;
 2020                 }
 2021 
 2022                 VM_OBJECT_WLOCK(obj);
 2023                 if (obj->type != OBJT_DEFAULT && obj->type != OBJT_SWAP) {
 2024                         VM_OBJECT_WUNLOCK(obj);
 2025                         continue;
 2026                 }
 2027 
 2028                 /*
 2029                  * Charge for the whole object allocation now, since
 2030                  * we cannot distinguish between non-charged and
 2031                  * charged clipped mapping of the same object later.
 2032                  */
 2033                 KASSERT(obj->charge == 0,
 2034                     ("vm_map_protect: object %p overcharged (entry %p)",
 2035                     obj, current));
 2036                 if (!swap_reserve(ptoa(obj->size))) {
 2037                         VM_OBJECT_WUNLOCK(obj);
 2038                         vm_map_unlock(map);
 2039                         return (KERN_RESOURCE_SHORTAGE);
 2040                 }
 2041 
 2042                 crhold(cred);
 2043                 obj->cred = cred;
 2044                 obj->charge = ptoa(obj->size);
 2045                 VM_OBJECT_WUNLOCK(obj);
 2046         }
 2047 
 2048         /*
 2049          * Go back and fix up protections. [Note that clipping is not
 2050          * necessary the second time.]
 2051          */
 2052         for (current = entry; current != &map->header && current->start < end;
 2053             current = current->next) {
 2054                 if ((current->eflags & MAP_ENTRY_GUARD) != 0)
 2055                         continue;
 2056 
 2057                 old_prot = current->protection;
 2058 
 2059                 if (set_max)
 2060                         current->protection =
 2061                             (current->max_protection = new_prot) &
 2062                             old_prot;
 2063                 else
 2064                         current->protection = new_prot;
 2065 
 2066                 /*
 2067                  * For user wired map entries, the normal lazy evaluation of
 2068                  * write access upgrades through soft page faults is
 2069                  * undesirable.  Instead, immediately copy any pages that are
 2070                  * copy-on-write and enable write access in the physical map.
 2071                  */
 2072                 if ((current->eflags & MAP_ENTRY_USER_WIRED) != 0 &&
 2073                     (current->protection & VM_PROT_WRITE) != 0 &&
 2074                     (old_prot & VM_PROT_WRITE) == 0)
 2075                         vm_fault_copy_entry(map, map, current, current, NULL);
 2076 
 2077                 /*
 2078                  * When restricting access, update the physical map.  Worry
 2079                  * about copy-on-write here.
 2080                  */
 2081                 if ((old_prot & ~current->protection) != 0) {
 2082 #define MASK(entry)     (((entry)->eflags & MAP_ENTRY_COW) ? ~VM_PROT_WRITE : \
 2083                                                         VM_PROT_ALL)
 2084                         pmap_protect(map->pmap, current->start,
 2085                             current->end,
 2086                             current->protection & MASK(current));
 2087 #undef  MASK
 2088                 }
 2089                 vm_map_simplify_entry(map, current);
 2090         }
 2091         vm_map_unlock(map);
 2092         return (KERN_SUCCESS);
 2093 }
 2094 
 2095 /*
 2096  *      vm_map_madvise:
 2097  *
 2098  *      This routine traverses a processes map handling the madvise
 2099  *      system call.  Advisories are classified as either those effecting
 2100  *      the vm_map_entry structure, or those effecting the underlying
 2101  *      objects.
 2102  */
 2103 int
 2104 vm_map_madvise(
 2105         vm_map_t map,
 2106         vm_offset_t start,
 2107         vm_offset_t end,
 2108         int behav)
 2109 {
 2110         vm_map_entry_t current, entry;
 2111         int modify_map = 0;
 2112 
 2113         /*
 2114          * Some madvise calls directly modify the vm_map_entry, in which case
 2115          * we need to use an exclusive lock on the map and we need to perform
 2116          * various clipping operations.  Otherwise we only need a read-lock
 2117          * on the map.
 2118          */
 2119         switch(behav) {
 2120         case MADV_NORMAL:
 2121         case MADV_SEQUENTIAL:
 2122         case MADV_RANDOM:
 2123         case MADV_NOSYNC:
 2124         case MADV_AUTOSYNC:
 2125         case MADV_NOCORE:
 2126         case MADV_CORE:
 2127                 if (start == end)
 2128                         return (KERN_SUCCESS);
 2129                 modify_map = 1;
 2130                 vm_map_lock(map);
 2131                 break;
 2132         case MADV_WILLNEED:
 2133         case MADV_DONTNEED:
 2134         case MADV_FREE:
 2135                 if (start == end)
 2136                         return (KERN_SUCCESS);
 2137                 vm_map_lock_read(map);
 2138                 break;
 2139         default:
 2140                 return (KERN_INVALID_ARGUMENT);
 2141         }
 2142 
 2143         /*
 2144          * Locate starting entry and clip if necessary.
 2145          */
 2146         VM_MAP_RANGE_CHECK(map, start, end);
 2147 
 2148         if (vm_map_lookup_entry(map, start, &entry)) {
 2149                 if (modify_map)
 2150                         vm_map_clip_start(map, entry, start);
 2151         } else {
 2152                 entry = entry->next;
 2153         }
 2154 
 2155         if (modify_map) {
 2156                 /*
 2157                  * madvise behaviors that are implemented in the vm_map_entry.
 2158                  *
 2159                  * We clip the vm_map_entry so that behavioral changes are
 2160                  * limited to the specified address range.
 2161                  */
 2162                 for (current = entry;
 2163                      (current != &map->header) && (current->start < end);
 2164                      current = current->next
 2165                 ) {
 2166                         if (current->eflags & MAP_ENTRY_IS_SUB_MAP)
 2167                                 continue;
 2168 
 2169                         vm_map_clip_end(map, current, end);
 2170 
 2171                         switch (behav) {
 2172                         case MADV_NORMAL:
 2173                                 vm_map_entry_set_behavior(current, MAP_ENTRY_BEHAV_NORMAL);
 2174                                 break;
 2175                         case MADV_SEQUENTIAL:
 2176                                 vm_map_entry_set_behavior(current, MAP_ENTRY_BEHAV_SEQUENTIAL);
 2177                                 break;
 2178                         case MADV_RANDOM:
 2179                                 vm_map_entry_set_behavior(current, MAP_ENTRY_BEHAV_RANDOM);
 2180                                 break;
 2181                         case MADV_NOSYNC:
 2182                                 current->eflags |= MAP_ENTRY_NOSYNC;
 2183                                 break;
 2184                         case MADV_AUTOSYNC:
 2185                                 current->eflags &= ~MAP_ENTRY_NOSYNC;
 2186                                 break;
 2187                         case MADV_NOCORE:
 2188                                 current->eflags |= MAP_ENTRY_NOCOREDUMP;
 2189                                 break;
 2190                         case MADV_CORE:
 2191                                 current->eflags &= ~MAP_ENTRY_NOCOREDUMP;
 2192                                 break;
 2193                         default:
 2194                                 break;
 2195                         }
 2196                         vm_map_simplify_entry(map, current);
 2197                 }
 2198                 vm_map_unlock(map);
 2199         } else {
 2200                 vm_pindex_t pstart, pend;
 2201 
 2202                 /*
 2203                  * madvise behaviors that are implemented in the underlying
 2204                  * vm_object.
 2205                  *
 2206                  * Since we don't clip the vm_map_entry, we have to clip
 2207                  * the vm_object pindex and count.
 2208                  */
 2209                 for (current = entry;
 2210                      (current != &map->header) && (current->start < end);
 2211                      current = current->next
 2212                 ) {
 2213                         vm_offset_t useEnd, useStart;
 2214 
 2215                         if (current->eflags & MAP_ENTRY_IS_SUB_MAP)
 2216                                 continue;
 2217 
 2218                         pstart = OFF_TO_IDX(current->offset);
 2219                         pend = pstart + atop(current->end - current->start);
 2220                         useStart = current->start;
 2221                         useEnd = current->end;
 2222 
 2223                         if (current->start < start) {
 2224                                 pstart += atop(start - current->start);
 2225                                 useStart = start;
 2226                         }
 2227                         if (current->end > end) {
 2228                                 pend -= atop(current->end - end);
 2229                                 useEnd = end;
 2230                         }
 2231 
 2232                         if (pstart >= pend)
 2233                                 continue;
 2234 
 2235                         /*
 2236                          * Perform the pmap_advise() before clearing
 2237                          * PGA_REFERENCED in vm_page_advise().  Otherwise, a
 2238                          * concurrent pmap operation, such as pmap_remove(),
 2239                          * could clear a reference in the pmap and set
 2240                          * PGA_REFERENCED on the page before the pmap_advise()
 2241                          * had completed.  Consequently, the page would appear
 2242                          * referenced based upon an old reference that
 2243                          * occurred before this pmap_advise() ran.
 2244                          */
 2245                         if (behav == MADV_DONTNEED || behav == MADV_FREE)
 2246                                 pmap_advise(map->pmap, useStart, useEnd,
 2247                                     behav);
 2248 
 2249                         vm_object_madvise(current->object.vm_object, pstart,
 2250                             pend, behav);
 2251 
 2252                         /*
 2253                          * Pre-populate paging structures in the
 2254                          * WILLNEED case.  For wired entries, the
 2255                          * paging structures are already populated.
 2256                          */
 2257                         if (behav == MADV_WILLNEED &&
 2258                             current->wired_count == 0) {
 2259                                 vm_map_pmap_enter(map,
 2260                                     useStart,
 2261                                     current->protection,
 2262                                     current->object.vm_object,
 2263                                     pstart,
 2264                                     ptoa(pend - pstart),
 2265                                     MAP_PREFAULT_MADVISE
 2266                                 );
 2267                         }
 2268                 }
 2269                 vm_map_unlock_read(map);
 2270         }
 2271         return (0);
 2272 }
 2273 
 2274 
 2275 /*
 2276  *      vm_map_inherit:
 2277  *
 2278  *      Sets the inheritance of the specified address
 2279  *      range in the target map.  Inheritance
 2280  *      affects how the map will be shared with
 2281  *      child maps at the time of vmspace_fork.
 2282  */
 2283 int
 2284 vm_map_inherit(vm_map_t map, vm_offset_t start, vm_offset_t end,
 2285                vm_inherit_t new_inheritance)
 2286 {
 2287         vm_map_entry_t entry;
 2288         vm_map_entry_t temp_entry;
 2289 
 2290         switch (new_inheritance) {
 2291         case VM_INHERIT_NONE:
 2292         case VM_INHERIT_COPY:
 2293         case VM_INHERIT_SHARE:
 2294         case VM_INHERIT_ZERO:
 2295                 break;
 2296         default:
 2297                 return (KERN_INVALID_ARGUMENT);
 2298         }
 2299         if (start == end)
 2300                 return (KERN_SUCCESS);
 2301         vm_map_lock(map);
 2302         VM_MAP_RANGE_CHECK(map, start, end);
 2303         if (vm_map_lookup_entry(map, start, &temp_entry)) {
 2304                 entry = temp_entry;
 2305                 vm_map_clip_start(map, entry, start);
 2306         } else
 2307                 entry = temp_entry->next;
 2308         while ((entry != &map->header) && (entry->start < end)) {
 2309                 vm_map_clip_end(map, entry, end);
 2310                 if ((entry->eflags & MAP_ENTRY_GUARD) == 0 ||
 2311                     new_inheritance != VM_INHERIT_ZERO)
 2312                         entry->inheritance = new_inheritance;
 2313                 vm_map_simplify_entry(map, entry);
 2314                 entry = entry->next;
 2315         }
 2316         vm_map_unlock(map);
 2317         return (KERN_SUCCESS);
 2318 }
 2319 
 2320 /*
 2321  *      vm_map_unwire:
 2322  *
 2323  *      Implements both kernel and user unwiring.
 2324  */
 2325 int
 2326 vm_map_unwire(vm_map_t map, vm_offset_t start, vm_offset_t end,
 2327     int flags)
 2328 {
 2329         vm_map_entry_t entry, first_entry, tmp_entry;
 2330         vm_offset_t saved_start;
 2331         unsigned int last_timestamp;
 2332         int rv;
 2333         boolean_t need_wakeup, result, user_unwire;
 2334 
 2335         if (start == end)
 2336                 return (KERN_SUCCESS);
 2337         user_unwire = (flags & VM_MAP_WIRE_USER) ? TRUE : FALSE;
 2338         vm_map_lock(map);
 2339         VM_MAP_RANGE_CHECK(map, start, end);
 2340         if (!vm_map_lookup_entry(map, start, &first_entry)) {
 2341                 if (flags & VM_MAP_WIRE_HOLESOK)
 2342                         first_entry = first_entry->next;
 2343                 else {
 2344                         vm_map_unlock(map);
 2345                         return (KERN_INVALID_ADDRESS);
 2346                 }
 2347         }
 2348         last_timestamp = map->timestamp;
 2349         entry = first_entry;
 2350         while (entry != &map->header && entry->start < end) {
 2351                 if (entry->eflags & MAP_ENTRY_IN_TRANSITION) {
 2352                         /*
 2353                          * We have not yet clipped the entry.
 2354                          */
 2355                         saved_start = (start >= entry->start) ? start :
 2356                             entry->start;
 2357                         entry->eflags |= MAP_ENTRY_NEEDS_WAKEUP;
 2358                         if (vm_map_unlock_and_wait(map, 0)) {
 2359                                 /*
 2360                                  * Allow interruption of user unwiring?
 2361                                  */
 2362                         }
 2363                         vm_map_lock(map);
 2364                         if (last_timestamp+1 != map->timestamp) {
 2365                                 /*
 2366                                  * Look again for the entry because the map was
 2367                                  * modified while it was unlocked.
 2368                                  * Specifically, the entry may have been
 2369                                  * clipped, merged, or deleted.
 2370                                  */
 2371                                 if (!vm_map_lookup_entry(map, saved_start,
 2372                                     &tmp_entry)) {
 2373                                         if (flags & VM_MAP_WIRE_HOLESOK)
 2374                                                 tmp_entry = tmp_entry->next;
 2375                                         else {
 2376                                                 if (saved_start == start) {
 2377                                                         /*
 2378                                                          * First_entry has been deleted.
 2379                                                          */
 2380                                                         vm_map_unlock(map);
 2381                                                         return (KERN_INVALID_ADDRESS);
 2382                                                 }
 2383                                                 end = saved_start;
 2384                                                 rv = KERN_INVALID_ADDRESS;
 2385                                                 goto done;
 2386                                         }
 2387                                 }
 2388                                 if (entry == first_entry)
 2389                                         first_entry = tmp_entry;
 2390                                 else
 2391                                         first_entry = NULL;
 2392                                 entry = tmp_entry;
 2393                         }
 2394                         last_timestamp = map->timestamp;
 2395                         continue;
 2396                 }
 2397                 vm_map_clip_start(map, entry, start);
 2398                 vm_map_clip_end(map, entry, end);
 2399                 /*
 2400                  * Mark the entry in case the map lock is released.  (See
 2401                  * above.)
 2402                  */
 2403                 KASSERT((entry->eflags & MAP_ENTRY_IN_TRANSITION) == 0 &&
 2404                     entry->wiring_thread == NULL,
 2405                     ("owned map entry %p", entry));
 2406                 entry->eflags |= MAP_ENTRY_IN_TRANSITION;
 2407                 entry->wiring_thread = curthread;
 2408                 /*
 2409                  * Check the map for holes in the specified region.
 2410                  * If VM_MAP_WIRE_HOLESOK was specified, skip this check.
 2411                  */
 2412                 if (((flags & VM_MAP_WIRE_HOLESOK) == 0) &&
 2413                     (entry->end < end && (entry->next == &map->header ||
 2414                     entry->next->start > entry->end))) {
 2415                         end = entry->end;
 2416                         rv = KERN_INVALID_ADDRESS;
 2417                         goto done;
 2418                 }
 2419                 /*
 2420                  * If system unwiring, require that the entry is system wired.
 2421                  */
 2422                 if (!user_unwire &&
 2423                     vm_map_entry_system_wired_count(entry) == 0) {
 2424                         end = entry->end;
 2425                         rv = KERN_INVALID_ARGUMENT;
 2426                         goto done;
 2427                 }
 2428                 entry = entry->next;
 2429         }
 2430         rv = KERN_SUCCESS;
 2431 done:
 2432         need_wakeup = FALSE;
 2433         if (first_entry == NULL) {
 2434                 result = vm_map_lookup_entry(map, start, &first_entry);
 2435                 if (!result && (flags & VM_MAP_WIRE_HOLESOK))
 2436                         first_entry = first_entry->next;
 2437                 else
 2438                         KASSERT(result, ("vm_map_unwire: lookup failed"));
 2439         }
 2440         for (entry = first_entry; entry != &map->header && entry->start < end;
 2441             entry = entry->next) {
 2442                 /*
 2443                  * If VM_MAP_WIRE_HOLESOK was specified, an empty
 2444                  * space in the unwired region could have been mapped
 2445                  * while the map lock was dropped for draining
 2446                  * MAP_ENTRY_IN_TRANSITION.  Moreover, another thread
 2447                  * could be simultaneously wiring this new mapping
 2448                  * entry.  Detect these cases and skip any entries
 2449                  * marked as in transition by us.
 2450                  */
 2451                 if ((entry->eflags & MAP_ENTRY_IN_TRANSITION) == 0 ||
 2452                     entry->wiring_thread != curthread) {
 2453                         KASSERT((flags & VM_MAP_WIRE_HOLESOK) != 0,
 2454                             ("vm_map_unwire: !HOLESOK and new/changed entry"));
 2455                         continue;
 2456                 }
 2457 
 2458                 if (rv == KERN_SUCCESS && (!user_unwire ||
 2459                     (entry->eflags & MAP_ENTRY_USER_WIRED))) {
 2460                         if (user_unwire)
 2461                                 entry->eflags &= ~MAP_ENTRY_USER_WIRED;
 2462                         if (entry->wired_count == 1)
 2463                                 vm_map_entry_unwire(map, entry);
 2464                         else
 2465                                 entry->wired_count--;
 2466                 }
 2467                 KASSERT((entry->eflags & MAP_ENTRY_IN_TRANSITION) != 0,
 2468                     ("vm_map_unwire: in-transition flag missing %p", entry));
 2469                 KASSERT(entry->wiring_thread == curthread,
 2470                     ("vm_map_unwire: alien wire %p", entry));
 2471                 entry->eflags &= ~MAP_ENTRY_IN_TRANSITION;
 2472                 entry->wiring_thread = NULL;
 2473                 if (entry->eflags & MAP_ENTRY_NEEDS_WAKEUP) {
 2474                         entry->eflags &= ~MAP_ENTRY_NEEDS_WAKEUP;
 2475                         need_wakeup = TRUE;
 2476                 }
 2477                 vm_map_simplify_entry(map, entry);
 2478         }
 2479         vm_map_unlock(map);
 2480         if (need_wakeup)
 2481                 vm_map_wakeup(map);
 2482         return (rv);
 2483 }
 2484 
 2485 /*
 2486  *      vm_map_wire_entry_failure:
 2487  *
 2488  *      Handle a wiring failure on the given entry.
 2489  *
 2490  *      The map should be locked.
 2491  */
 2492 static void
 2493 vm_map_wire_entry_failure(vm_map_t map, vm_map_entry_t entry,
 2494     vm_offset_t failed_addr)
 2495 {
 2496 
 2497         VM_MAP_ASSERT_LOCKED(map);
 2498         KASSERT((entry->eflags & MAP_ENTRY_IN_TRANSITION) != 0 &&
 2499             entry->wired_count == 1,
 2500             ("vm_map_wire_entry_failure: entry %p isn't being wired", entry));
 2501         KASSERT(failed_addr < entry->end,
 2502             ("vm_map_wire_entry_failure: entry %p was fully wired", entry));
 2503 
 2504         /*
 2505          * If any pages at the start of this entry were successfully wired,
 2506          * then unwire them.
 2507          */
 2508         if (failed_addr > entry->start) {
 2509                 pmap_unwire(map->pmap, entry->start, failed_addr);
 2510                 vm_object_unwire(entry->object.vm_object, entry->offset,
 2511                     failed_addr - entry->start, PQ_ACTIVE);
 2512         }
 2513 
 2514         /*
 2515          * Assign an out-of-range value to represent the failure to wire this
 2516          * entry.
 2517          */
 2518         entry->wired_count = -1;
 2519 }
 2520 
 2521 /*
 2522  *      vm_map_wire:
 2523  *
 2524  *      Implements both kernel and user wiring.
 2525  */
 2526 int
 2527 vm_map_wire(vm_map_t map, vm_offset_t start, vm_offset_t end,
 2528     int flags)
 2529 {
 2530         vm_map_entry_t entry, first_entry, tmp_entry;
 2531         vm_offset_t faddr, saved_end, saved_start;
 2532         unsigned int last_timestamp;
 2533         int rv;
 2534         boolean_t need_wakeup, result, user_wire;
 2535         vm_prot_t prot;
 2536 
 2537         if (start == end)
 2538                 return (KERN_SUCCESS);
 2539         prot = 0;
 2540         if (flags & VM_MAP_WIRE_WRITE)
 2541                 prot |= VM_PROT_WRITE;
 2542         user_wire = (flags & VM_MAP_WIRE_USER) ? TRUE : FALSE;
 2543         vm_map_lock(map);
 2544         VM_MAP_RANGE_CHECK(map, start, end);
 2545         if (!vm_map_lookup_entry(map, start, &first_entry)) {
 2546                 if (flags & VM_MAP_WIRE_HOLESOK)
 2547                         first_entry = first_entry->next;
 2548                 else {
 2549                         vm_map_unlock(map);
 2550                         return (KERN_INVALID_ADDRESS);
 2551                 }
 2552         }
 2553         last_timestamp = map->timestamp;
 2554         entry = first_entry;
 2555         while (entry != &map->header && entry->start < end) {
 2556                 if (entry->eflags & MAP_ENTRY_IN_TRANSITION) {
 2557                         /*
 2558                          * We have not yet clipped the entry.
 2559                          */
 2560                         saved_start = (start >= entry->start) ? start :
 2561                             entry->start;
 2562                         entry->eflags |= MAP_ENTRY_NEEDS_WAKEUP;
 2563                         if (vm_map_unlock_and_wait(map, 0)) {
 2564                                 /*
 2565                                  * Allow interruption of user wiring?
 2566                                  */
 2567                         }
 2568                         vm_map_lock(map);
 2569                         if (last_timestamp + 1 != map->timestamp) {
 2570                                 /*
 2571                                  * Look again for the entry because the map was
 2572                                  * modified while it was unlocked.
 2573                                  * Specifically, the entry may have been
 2574                                  * clipped, merged, or deleted.
 2575                                  */
 2576                                 if (!vm_map_lookup_entry(map, saved_start,
 2577                                     &tmp_entry)) {
 2578                                         if (flags & VM_MAP_WIRE_HOLESOK)
 2579                                                 tmp_entry = tmp_entry->next;
 2580                                         else {
 2581                                                 if (saved_start == start) {
 2582                                                         /*
 2583                                                          * first_entry has been deleted.
 2584                                                          */
 2585                                                         vm_map_unlock(map);
 2586                                                         return (KERN_INVALID_ADDRESS);
 2587                                                 }
 2588                                                 end = saved_start;
 2589                                                 rv = KERN_INVALID_ADDRESS;
 2590                                                 goto done;
 2591                                         }
 2592                                 }
 2593                                 if (entry == first_entry)
 2594                                         first_entry = tmp_entry;
 2595                                 else
 2596                                         first_entry = NULL;
 2597                                 entry = tmp_entry;
 2598                         }
 2599                         last_timestamp = map->timestamp;
 2600                         continue;
 2601                 }
 2602                 vm_map_clip_start(map, entry, start);
 2603                 vm_map_clip_end(map, entry, end);
 2604                 /*
 2605                  * Mark the entry in case the map lock is released.  (See
 2606                  * above.)
 2607                  */
 2608                 KASSERT((entry->eflags & MAP_ENTRY_IN_TRANSITION) == 0 &&
 2609                     entry->wiring_thread == NULL,
 2610                     ("owned map entry %p", entry));
 2611                 entry->eflags |= MAP_ENTRY_IN_TRANSITION;
 2612                 entry->wiring_thread = curthread;
 2613                 if ((entry->protection & (VM_PROT_READ | VM_PROT_EXECUTE)) == 0
 2614                     || (entry->protection & prot) != prot) {
 2615                         entry->eflags |= MAP_ENTRY_WIRE_SKIPPED;
 2616                         if ((flags & VM_MAP_WIRE_HOLESOK) == 0) {
 2617                                 end = entry->end;
 2618                                 rv = KERN_INVALID_ADDRESS;
 2619                                 goto done;
 2620                         }
 2621                         goto next_entry;
 2622                 }
 2623                 if (entry->wired_count == 0) {
 2624                         entry->wired_count++;
 2625                         saved_start = entry->start;
 2626                         saved_end = entry->end;
 2627 
 2628                         /*
 2629                          * Release the map lock, relying on the in-transition
 2630                          * mark.  Mark the map busy for fork.
 2631                          */
 2632                         vm_map_busy(map);
 2633                         vm_map_unlock(map);
 2634 
 2635                         faddr = saved_start;
 2636                         do {
 2637                                 /*
 2638                                  * Simulate a fault to get the page and enter
 2639                                  * it into the physical map.
 2640                                  */
 2641                                 if ((rv = vm_fault(map, faddr, VM_PROT_NONE,
 2642                                     VM_FAULT_WIRE)) != KERN_SUCCESS)
 2643                                         break;
 2644                         } while ((faddr += PAGE_SIZE) < saved_end);
 2645                         vm_map_lock(map);
 2646                         vm_map_unbusy(map);
 2647                         if (last_timestamp + 1 != map->timestamp) {
 2648                                 /*
 2649                                  * Look again for the entry because the map was
 2650                                  * modified while it was unlocked.  The entry
 2651                                  * may have been clipped, but NOT merged or
 2652                                  * deleted.
 2653                                  */
 2654                                 result = vm_map_lookup_entry(map, saved_start,
 2655                                     &tmp_entry);
 2656                                 KASSERT(result, ("vm_map_wire: lookup failed"));
 2657                                 if (entry == first_entry)
 2658                                         first_entry = tmp_entry;
 2659                                 else
 2660                                         first_entry = NULL;
 2661                                 entry = tmp_entry;
 2662                                 while (entry->end < saved_end) {
 2663                                         /*
 2664                                          * In case of failure, handle entries
 2665                                          * that were not fully wired here;
 2666                                          * fully wired entries are handled
 2667                                          * later.
 2668                                          */
 2669                                         if (rv != KERN_SUCCESS &&
 2670                                             faddr < entry->end)
 2671                                                 vm_map_wire_entry_failure(map,
 2672                                                     entry, faddr);
 2673                                         entry = entry->next;
 2674                                 }
 2675                         }
 2676                         last_timestamp = map->timestamp;
 2677                         if (rv != KERN_SUCCESS) {
 2678                                 vm_map_wire_entry_failure(map, entry, faddr);
 2679                                 end = entry->end;
 2680                                 goto done;
 2681                         }
 2682                 } else if (!user_wire ||
 2683                            (entry->eflags & MAP_ENTRY_USER_WIRED) == 0) {
 2684                         entry->wired_count++;
 2685                 }
 2686                 /*
 2687                  * Check the map for holes in the specified region.
 2688                  * If VM_MAP_WIRE_HOLESOK was specified, skip this check.
 2689                  */
 2690         next_entry:
 2691                 if ((flags & VM_MAP_WIRE_HOLESOK) == 0 &&
 2692                     entry->end < end && (entry->next == &map->header ||
 2693                     entry->next->start > entry->end)) {
 2694                         end = entry->end;
 2695                         rv = KERN_INVALID_ADDRESS;
 2696                         goto done;
 2697                 }
 2698                 entry = entry->next;
 2699         }
 2700         rv = KERN_SUCCESS;
 2701 done:
 2702         need_wakeup = FALSE;
 2703         if (first_entry == NULL) {
 2704                 result = vm_map_lookup_entry(map, start, &first_entry);
 2705                 if (!result && (flags & VM_MAP_WIRE_HOLESOK))
 2706                         first_entry = first_entry->next;
 2707                 else
 2708                         KASSERT(result, ("vm_map_wire: lookup failed"));
 2709         }
 2710         for (entry = first_entry; entry != &map->header && entry->start < end;
 2711             entry = entry->next) {
 2712                 /*
 2713                  * If VM_MAP_WIRE_HOLESOK was specified, an empty
 2714                  * space in the unwired region could have been mapped
 2715                  * while the map lock was dropped for faulting in the
 2716                  * pages or draining MAP_ENTRY_IN_TRANSITION.
 2717                  * Moreover, another thread could be simultaneously
 2718                  * wiring this new mapping entry.  Detect these cases
 2719                  * and skip any entries marked as in transition not by us.
 2720                  */
 2721                 if ((entry->eflags & MAP_ENTRY_IN_TRANSITION) == 0 ||
 2722                     entry->wiring_thread != curthread) {
 2723                         KASSERT((flags & VM_MAP_WIRE_HOLESOK) != 0,
 2724                             ("vm_map_wire: !HOLESOK and new/changed entry"));
 2725                         continue;
 2726                 }
 2727 
 2728                 if ((entry->eflags & MAP_ENTRY_WIRE_SKIPPED) != 0)
 2729                         goto next_entry_done;
 2730 
 2731                 if (rv == KERN_SUCCESS) {
 2732                         if (user_wire)
 2733                                 entry->eflags |= MAP_ENTRY_USER_WIRED;
 2734                 } else if (entry->wired_count == -1) {
 2735                         /*
 2736                          * Wiring failed on this entry.  Thus, unwiring is
 2737                          * unnecessary.
 2738                          */
 2739                         entry->wired_count = 0;
 2740                 } else if (!user_wire ||
 2741                     (entry->eflags & MAP_ENTRY_USER_WIRED) == 0) {
 2742                         /*
 2743                          * Undo the wiring.  Wiring succeeded on this entry
 2744                          * but failed on a later entry.  
 2745                          */
 2746                         if (entry->wired_count == 1)
 2747                                 vm_map_entry_unwire(map, entry);
 2748                         else
 2749                                 entry->wired_count--;
 2750                 }
 2751         next_entry_done:
 2752                 KASSERT((entry->eflags & MAP_ENTRY_IN_TRANSITION) != 0,
 2753                     ("vm_map_wire: in-transition flag missing %p", entry));
 2754                 KASSERT(entry->wiring_thread == curthread,
 2755                     ("vm_map_wire: alien wire %p", entry));
 2756                 entry->eflags &= ~(MAP_ENTRY_IN_TRANSITION |
 2757                     MAP_ENTRY_WIRE_SKIPPED);
 2758                 entry->wiring_thread = NULL;
 2759                 if (entry->eflags & MAP_ENTRY_NEEDS_WAKEUP) {
 2760                         entry->eflags &= ~MAP_ENTRY_NEEDS_WAKEUP;
 2761                         need_wakeup = TRUE;
 2762                 }
 2763                 vm_map_simplify_entry(map, entry);
 2764         }
 2765         vm_map_unlock(map);
 2766         if (need_wakeup)
 2767                 vm_map_wakeup(map);
 2768         return (rv);
 2769 }
 2770 
 2771 /*
 2772  * vm_map_sync
 2773  *
 2774  * Push any dirty cached pages in the address range to their pager.
 2775  * If syncio is TRUE, dirty pages are written synchronously.
 2776  * If invalidate is TRUE, any cached pages are freed as well.
 2777  *
 2778  * If the size of the region from start to end is zero, we are
 2779  * supposed to flush all modified pages within the region containing
 2780  * start.  Unfortunately, a region can be split or coalesced with
 2781  * neighboring regions, making it difficult to determine what the
 2782  * original region was.  Therefore, we approximate this requirement by
 2783  * flushing the current region containing start.
 2784  *
 2785  * Returns an error if any part of the specified range is not mapped.
 2786  */
 2787 int
 2788 vm_map_sync(
 2789         vm_map_t map,
 2790         vm_offset_t start,
 2791         vm_offset_t end,
 2792         boolean_t syncio,
 2793         boolean_t invalidate)
 2794 {
 2795         vm_map_entry_t current;
 2796         vm_map_entry_t entry;
 2797         vm_size_t size;
 2798         vm_object_t object;
 2799         vm_ooffset_t offset;
 2800         unsigned int last_timestamp;
 2801         boolean_t failed;
 2802 
 2803         vm_map_lock_read(map);
 2804         VM_MAP_RANGE_CHECK(map, start, end);
 2805         if (!vm_map_lookup_entry(map, start, &entry)) {
 2806                 vm_map_unlock_read(map);
 2807                 return (KERN_INVALID_ADDRESS);
 2808         } else if (start == end) {
 2809                 start = entry->start;
 2810                 end = entry->end;
 2811         }
 2812         /*
 2813          * Make a first pass to check for user-wired memory and holes.
 2814          */
 2815         for (current = entry; current != &map->header && current->start < end;
 2816             current = current->next) {
 2817                 if (invalidate && (current->eflags & MAP_ENTRY_USER_WIRED)) {
 2818                         vm_map_unlock_read(map);
 2819                         return (KERN_INVALID_ARGUMENT);
 2820                 }
 2821                 if (end > current->end &&
 2822                     (current->next == &map->header ||
 2823                         current->end != current->next->start)) {
 2824                         vm_map_unlock_read(map);
 2825                         return (KERN_INVALID_ADDRESS);
 2826                 }
 2827         }
 2828 
 2829         if (invalidate)
 2830                 pmap_remove(map->pmap, start, end);
 2831         failed = FALSE;
 2832 
 2833         /*
 2834          * Make a second pass, cleaning/uncaching pages from the indicated
 2835          * objects as we go.
 2836          */
 2837         for (current = entry; current != &map->header && current->start < end;) {
 2838                 offset = current->offset + (start - current->start);
 2839                 size = (end <= current->end ? end : current->end) - start;
 2840                 if (current->eflags & MAP_ENTRY_IS_SUB_MAP) {
 2841                         vm_map_t smap;
 2842                         vm_map_entry_t tentry;
 2843                         vm_size_t tsize;
 2844 
 2845                         smap = current->object.sub_map;
 2846                         vm_map_lock_read(smap);
 2847                         (void) vm_map_lookup_entry(smap, offset, &tentry);
 2848                         tsize = tentry->end - offset;
 2849                         if (tsize < size)
 2850                                 size = tsize;
 2851                         object = tentry->object.vm_object;
 2852                         offset = tentry->offset + (offset - tentry->start);
 2853                         vm_map_unlock_read(smap);
 2854                 } else {
 2855                         object = current->object.vm_object;
 2856                 }
 2857                 vm_object_reference(object);
 2858                 last_timestamp = map->timestamp;
 2859                 vm_map_unlock_read(map);
 2860                 if (!vm_object_sync(object, offset, size, syncio, invalidate))
 2861                         failed = TRUE;
 2862                 start += size;
 2863                 vm_object_deallocate(object);
 2864                 vm_map_lock_read(map);
 2865                 if (last_timestamp == map->timestamp ||
 2866                     !vm_map_lookup_entry(map, start, &current))
 2867                         current = current->next;
 2868         }
 2869 
 2870         vm_map_unlock_read(map);
 2871         return (failed ? KERN_FAILURE : KERN_SUCCESS);
 2872 }
 2873 
 2874 /*
 2875  *      vm_map_entry_unwire:    [ internal use only ]
 2876  *
 2877  *      Make the region specified by this entry pageable.
 2878  *
 2879  *      The map in question should be locked.
 2880  *      [This is the reason for this routine's existence.]
 2881  */
 2882 static void
 2883 vm_map_entry_unwire(vm_map_t map, vm_map_entry_t entry)
 2884 {
 2885 
 2886         VM_MAP_ASSERT_LOCKED(map);
 2887         KASSERT(entry->wired_count > 0,
 2888             ("vm_map_entry_unwire: entry %p isn't wired", entry));
 2889         pmap_unwire(map->pmap, entry->start, entry->end);
 2890         vm_object_unwire(entry->object.vm_object, entry->offset, entry->end -
 2891             entry->start, PQ_ACTIVE);
 2892         entry->wired_count = 0;
 2893 }
 2894 
 2895 static void
 2896 vm_map_entry_deallocate(vm_map_entry_t entry, boolean_t system_map)
 2897 {
 2898 
 2899         if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0)
 2900                 vm_object_deallocate(entry->object.vm_object);
 2901         uma_zfree(system_map ? kmapentzone : mapentzone, entry);
 2902 }
 2903 
 2904 /*
 2905  *      vm_map_entry_delete:    [ internal use only ]
 2906  *
 2907  *      Deallocate the given entry from the target map.
 2908  */
 2909 static void
 2910 vm_map_entry_delete(vm_map_t map, vm_map_entry_t entry)
 2911 {
 2912         vm_object_t object;
 2913         vm_pindex_t offidxstart, offidxend, count, size1;
 2914         vm_ooffset_t size;
 2915 
 2916         vm_map_entry_unlink(map, entry);
 2917         object = entry->object.vm_object;
 2918 
 2919         if ((entry->eflags & MAP_ENTRY_GUARD) != 0) {
 2920                 MPASS(entry->cred == NULL);
 2921                 MPASS((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0);
 2922                 MPASS(object == NULL);
 2923                 vm_map_entry_deallocate(entry, map->system_map);
 2924                 return;
 2925         }
 2926 
 2927         size = entry->end - entry->start;
 2928         map->size -= size;
 2929 
 2930         if (entry->cred != NULL) {
 2931                 swap_release_by_cred(size, entry->cred);
 2932                 crfree(entry->cred);
 2933         }
 2934 
 2935         if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0 &&
 2936             (object != NULL)) {
 2937                 KASSERT(entry->cred == NULL || object->cred == NULL ||
 2938                     (entry->eflags & MAP_ENTRY_NEEDS_COPY),
 2939                     ("OVERCOMMIT vm_map_entry_delete: both cred %p", entry));
 2940                 count = OFF_TO_IDX(size);
 2941                 offidxstart = OFF_TO_IDX(entry->offset);
 2942                 offidxend = offidxstart + count;
 2943                 VM_OBJECT_WLOCK(object);
 2944                 if (object->ref_count != 1 && ((object->flags & (OBJ_NOSPLIT |
 2945                     OBJ_ONEMAPPING)) == OBJ_ONEMAPPING ||
 2946                     object == kernel_object || object == kmem_object)) {
 2947                         vm_object_collapse(object);
 2948 
 2949                         /*
 2950                          * The option OBJPR_NOTMAPPED can be passed here
 2951                          * because vm_map_delete() already performed
 2952                          * pmap_remove() on the only mapping to this range
 2953                          * of pages. 
 2954                          */
 2955                         vm_object_page_remove(object, offidxstart, offidxend,
 2956                             OBJPR_NOTMAPPED);
 2957                         if (object->type == OBJT_SWAP)
 2958                                 swap_pager_freespace(object, offidxstart,
 2959                                     count);
 2960                         if (offidxend >= object->size &&
 2961                             offidxstart < object->size) {
 2962                                 size1 = object->size;
 2963                                 object->size = offidxstart;
 2964                                 if (object->cred != NULL) {
 2965                                         size1 -= object->size;
 2966                                         KASSERT(object->charge >= ptoa(size1),
 2967                                             ("object %p charge < 0", object));
 2968                                         swap_release_by_cred(ptoa(size1),
 2969                                             object->cred);
 2970                                         object->charge -= ptoa(size1);
 2971                                 }
 2972                         }
 2973                 }
 2974                 VM_OBJECT_WUNLOCK(object);
 2975         } else
 2976                 entry->object.vm_object = NULL;
 2977         if (map->system_map)
 2978                 vm_map_entry_deallocate(entry, TRUE);
 2979         else {
 2980                 entry->next = curthread->td_map_def_user;
 2981                 curthread->td_map_def_user = entry;
 2982         }
 2983 }
 2984 
 2985 /*
 2986  *      vm_map_delete:  [ internal use only ]
 2987  *
 2988  *      Deallocates the given address range from the target
 2989  *      map.
 2990  */
 2991 int
 2992 vm_map_delete(vm_map_t map, vm_offset_t start, vm_offset_t end)
 2993 {
 2994         vm_map_entry_t entry;
 2995         vm_map_entry_t first_entry;
 2996 
 2997         VM_MAP_ASSERT_LOCKED(map);
 2998         if (start == end)
 2999                 return (KERN_SUCCESS);
 3000 
 3001         /*
 3002          * Find the start of the region, and clip it
 3003          */
 3004         if (!vm_map_lookup_entry(map, start, &first_entry))
 3005                 entry = first_entry->next;
 3006         else {
 3007                 entry = first_entry;
 3008                 vm_map_clip_start(map, entry, start);
 3009         }
 3010 
 3011         /*
 3012          * Step through all entries in this region
 3013          */
 3014         while ((entry != &map->header) && (entry->start < end)) {
 3015                 vm_map_entry_t next;
 3016 
 3017                 /*
 3018                  * Wait for wiring or unwiring of an entry to complete.
 3019                  * Also wait for any system wirings to disappear on
 3020                  * user maps.
 3021                  */
 3022                 if ((entry->eflags & MAP_ENTRY_IN_TRANSITION) != 0 ||
 3023                     (vm_map_pmap(map) != kernel_pmap &&
 3024                     vm_map_entry_system_wired_count(entry) != 0)) {
 3025                         unsigned int last_timestamp;
 3026                         vm_offset_t saved_start;
 3027                         vm_map_entry_t tmp_entry;
 3028 
 3029                         saved_start = entry->start;
 3030                         entry->eflags |= MAP_ENTRY_NEEDS_WAKEUP;
 3031                         last_timestamp = map->timestamp;
 3032                         (void) vm_map_unlock_and_wait(map, 0);
 3033                         vm_map_lock(map);
 3034                         if (last_timestamp + 1 != map->timestamp) {
 3035                                 /*
 3036                                  * Look again for the entry because the map was
 3037                                  * modified while it was unlocked.
 3038                                  * Specifically, the entry may have been
 3039                                  * clipped, merged, or deleted.
 3040                                  */
 3041                                 if (!vm_map_lookup_entry(map, saved_start,
 3042                                                          &tmp_entry))
 3043                                         entry = tmp_entry->next;
 3044                                 else {
 3045                                         entry = tmp_entry;
 3046                                         vm_map_clip_start(map, entry,
 3047                                                           saved_start);
 3048                                 }
 3049                         }
 3050                         continue;
 3051                 }
 3052                 vm_map_clip_end(map, entry, end);
 3053 
 3054                 next = entry->next;
 3055 
 3056                 /*
 3057                  * Unwire before removing addresses from the pmap; otherwise,
 3058                  * unwiring will put the entries back in the pmap.
 3059                  */
 3060                 if (entry->wired_count != 0) {
 3061                         vm_map_entry_unwire(map, entry);
 3062                 }
 3063 
 3064                 pmap_remove(map->pmap, entry->start, entry->end);
 3065 
 3066                 /*
 3067                  * Delete the entry only after removing all pmap
 3068                  * entries pointing to its pages.  (Otherwise, its
 3069                  * page frames may be reallocated, and any modify bits
 3070                  * will be set in the wrong object!)
 3071                  */
 3072                 vm_map_entry_delete(map, entry);
 3073                 entry = next;
 3074         }
 3075         return (KERN_SUCCESS);
 3076 }
 3077 
 3078 /*
 3079  *      vm_map_remove:
 3080  *
 3081  *      Remove the given address range from the target map.
 3082  *      This is the exported form of vm_map_delete.
 3083  */
 3084 int
 3085 vm_map_remove(vm_map_t map, vm_offset_t start, vm_offset_t end)
 3086 {
 3087         int result;
 3088 
 3089         vm_map_lock(map);
 3090         VM_MAP_RANGE_CHECK(map, start, end);
 3091         result = vm_map_delete(map, start, end);
 3092         vm_map_unlock(map);
 3093         return (result);
 3094 }
 3095 
 3096 /*
 3097  *      vm_map_check_protection:
 3098  *
 3099  *      Assert that the target map allows the specified privilege on the
 3100  *      entire address region given.  The entire region must be allocated.
 3101  *
 3102  *      WARNING!  This code does not and should not check whether the
 3103  *      contents of the region is accessible.  For example a smaller file
 3104  *      might be mapped into a larger address space.
 3105  *
 3106  *      NOTE!  This code is also called by munmap().
 3107  *
 3108  *      The map must be locked.  A read lock is sufficient.
 3109  */
 3110 boolean_t
 3111 vm_map_check_protection(vm_map_t map, vm_offset_t start, vm_offset_t end,
 3112                         vm_prot_t protection)
 3113 {
 3114         vm_map_entry_t entry;
 3115         vm_map_entry_t tmp_entry;
 3116 
 3117         if (!vm_map_lookup_entry(map, start, &tmp_entry))
 3118                 return (FALSE);
 3119         entry = tmp_entry;
 3120 
 3121         while (start < end) {
 3122                 if (entry == &map->header)
 3123                         return (FALSE);
 3124                 /*
 3125                  * No holes allowed!
 3126                  */
 3127                 if (start < entry->start)
 3128                         return (FALSE);
 3129                 /*
 3130                  * Check protection associated with entry.
 3131                  */
 3132                 if ((entry->protection & protection) != protection)
 3133                         return (FALSE);
 3134                 /* go to next entry */
 3135                 start = entry->end;
 3136                 entry = entry->next;
 3137         }
 3138         return (TRUE);
 3139 }
 3140 
 3141 /*
 3142  *      vm_map_copy_entry:
 3143  *
 3144  *      Copies the contents of the source entry to the destination
 3145  *      entry.  The entries *must* be aligned properly.
 3146  */
 3147 static void
 3148 vm_map_copy_entry(
 3149         vm_map_t src_map,
 3150         vm_map_t dst_map,
 3151         vm_map_entry_t src_entry,
 3152         vm_map_entry_t dst_entry,
 3153         vm_ooffset_t *fork_charge)
 3154 {
 3155         vm_object_t src_object;
 3156         vm_map_entry_t fake_entry;
 3157         vm_offset_t size;
 3158         struct ucred *cred;
 3159         int charged;
 3160 
 3161         VM_MAP_ASSERT_LOCKED(dst_map);
 3162 
 3163         if ((dst_entry->eflags|src_entry->eflags) & MAP_ENTRY_IS_SUB_MAP)
 3164                 return;
 3165 
 3166         if (src_entry->wired_count == 0 ||
 3167             (src_entry->protection & VM_PROT_WRITE) == 0) {
 3168                 /*
 3169                  * If the source entry is marked needs_copy, it is already
 3170                  * write-protected.
 3171                  */
 3172                 if ((src_entry->eflags & MAP_ENTRY_NEEDS_COPY) == 0 &&
 3173                     (src_entry->protection & VM_PROT_WRITE) != 0) {
 3174                         pmap_protect(src_map->pmap,
 3175                             src_entry->start,
 3176                             src_entry->end,
 3177                             src_entry->protection & ~VM_PROT_WRITE);
 3178                 }
 3179 
 3180                 /*
 3181                  * Make a copy of the object.
 3182                  */
 3183                 size = src_entry->end - src_entry->start;
 3184                 if ((src_object = src_entry->object.vm_object) != NULL) {
 3185                         VM_OBJECT_WLOCK(src_object);
 3186                         charged = ENTRY_CHARGED(src_entry);
 3187                         if (src_object->handle == NULL &&
 3188                             (src_object->type == OBJT_DEFAULT ||
 3189                             src_object->type == OBJT_SWAP)) {
 3190                                 vm_object_collapse(src_object);
 3191                                 if ((src_object->flags & (OBJ_NOSPLIT |
 3192                                     OBJ_ONEMAPPING)) == OBJ_ONEMAPPING) {
 3193                                         vm_object_split(src_entry);
 3194                                         src_object =
 3195                                             src_entry->object.vm_object;
 3196                                 }
 3197                         }
 3198                         vm_object_reference_locked(src_object);
 3199                         vm_object_clear_flag(src_object, OBJ_ONEMAPPING);
 3200                         if (src_entry->cred != NULL &&
 3201                             !(src_entry->eflags & MAP_ENTRY_NEEDS_COPY)) {
 3202                                 KASSERT(src_object->cred == NULL,
 3203                                     ("OVERCOMMIT: vm_map_copy_entry: cred %p",
 3204                                      src_object));
 3205                                 src_object->cred = src_entry->cred;
 3206                                 src_object->charge = size;
 3207                         }
 3208                         VM_OBJECT_WUNLOCK(src_object);
 3209                         dst_entry->object.vm_object = src_object;
 3210                         if (charged) {
 3211                                 cred = curthread->td_ucred;
 3212                                 crhold(cred);
 3213                                 dst_entry->cred = cred;
 3214                                 *fork_charge += size;
 3215                                 if (!(src_entry->eflags &
 3216                                       MAP_ENTRY_NEEDS_COPY)) {
 3217                                         crhold(cred);
 3218                                         src_entry->cred = cred;
 3219                                         *fork_charge += size;
 3220                                 }
 3221                         }
 3222                         src_entry->eflags |= MAP_ENTRY_COW |
 3223                             MAP_ENTRY_NEEDS_COPY;
 3224                         dst_entry->eflags |= MAP_ENTRY_COW |
 3225                             MAP_ENTRY_NEEDS_COPY;
 3226                         dst_entry->offset = src_entry->offset;
 3227                         if (src_entry->eflags & MAP_ENTRY_VN_WRITECNT) {
 3228                                 /*
 3229                                  * MAP_ENTRY_VN_WRITECNT cannot
 3230                                  * indicate write reference from
 3231                                  * src_entry, since the entry is
 3232                                  * marked as needs copy.  Allocate a
 3233                                  * fake entry that is used to
 3234                                  * decrement object->un_pager.vnp.writecount
 3235                                  * at the appropriate time.  Attach
 3236                                  * fake_entry to the deferred list.
 3237                                  */
 3238                                 fake_entry = vm_map_entry_create(dst_map);
 3239                                 fake_entry->eflags = MAP_ENTRY_VN_WRITECNT;
 3240                                 src_entry->eflags &= ~MAP_ENTRY_VN_WRITECNT;
 3241                                 vm_object_reference(src_object);
 3242                                 fake_entry->object.vm_object = src_object;
 3243                                 fake_entry->start = src_entry->start;
 3244                                 fake_entry->end = src_entry->end;
 3245                                 fake_entry->next = curthread->td_map_def_user;
 3246                                 curthread->td_map_def_user = fake_entry;
 3247                         }
 3248 
 3249                         pmap_copy(dst_map->pmap, src_map->pmap,
 3250                             dst_entry->start, dst_entry->end - dst_entry->start,
 3251                             src_entry->start);
 3252                 } else {
 3253                         dst_entry->object.vm_object = NULL;
 3254                         dst_entry->offset = 0;
 3255                         if (src_entry->cred != NULL) {
 3256                                 dst_entry->cred = curthread->td_ucred;
 3257                                 crhold(dst_entry->cred);
 3258                                 *fork_charge += size;
 3259                         }
 3260                 }
 3261         } else {
 3262                 /*
 3263                  * We don't want to make writeable wired pages copy-on-write.
 3264                  * Immediately copy these pages into the new map by simulating
 3265                  * page faults.  The new pages are pageable.
 3266                  */
 3267                 vm_fault_copy_entry(dst_map, src_map, dst_entry, src_entry,
 3268                     fork_charge);
 3269         }
 3270 }
 3271 
 3272 /*
 3273  * vmspace_map_entry_forked:
 3274  * Update the newly-forked vmspace each time a map entry is inherited
 3275  * or copied.  The values for vm_dsize and vm_tsize are approximate
 3276  * (and mostly-obsolete ideas in the face of mmap(2) et al.)
 3277  */
 3278 static void
 3279 vmspace_map_entry_forked(const struct vmspace *vm1, struct vmspace *vm2,
 3280     vm_map_entry_t entry)
 3281 {
 3282         vm_size_t entrysize;
 3283         vm_offset_t newend;
 3284 
 3285         if ((entry->eflags & MAP_ENTRY_GUARD) != 0)
 3286                 return;
 3287         entrysize = entry->end - entry->start;
 3288         vm2->vm_map.size += entrysize;
 3289         if (entry->eflags & (MAP_ENTRY_GROWS_DOWN | MAP_ENTRY_GROWS_UP)) {
 3290                 vm2->vm_ssize += btoc(entrysize);
 3291         } else if (entry->start >= (vm_offset_t)vm1->vm_daddr &&
 3292             entry->start < (vm_offset_t)vm1->vm_daddr + ctob(vm1->vm_dsize)) {
 3293                 newend = MIN(entry->end,
 3294                     (vm_offset_t)vm1->vm_daddr + ctob(vm1->vm_dsize));
 3295                 vm2->vm_dsize += btoc(newend - entry->start);
 3296         } else if (entry->start >= (vm_offset_t)vm1->vm_taddr &&
 3297             entry->start < (vm_offset_t)vm1->vm_taddr + ctob(vm1->vm_tsize)) {
 3298                 newend = MIN(entry->end,
 3299                     (vm_offset_t)vm1->vm_taddr + ctob(vm1->vm_tsize));
 3300                 vm2->vm_tsize += btoc(newend - entry->start);
 3301         }
 3302 }
 3303 
 3304 /*
 3305  * vmspace_fork:
 3306  * Create a new process vmspace structure and vm_map
 3307  * based on those of an existing process.  The new map
 3308  * is based on the old map, according to the inheritance
 3309  * values on the regions in that map.
 3310  *
 3311  * XXX It might be worth coalescing the entries added to the new vmspace.
 3312  *
 3313  * The source map must not be locked.
 3314  */
 3315 struct vmspace *
 3316 vmspace_fork(struct vmspace *vm1, vm_ooffset_t *fork_charge)
 3317 {
 3318         struct vmspace *vm2;
 3319         vm_map_t new_map, old_map;
 3320         vm_map_entry_t new_entry, old_entry;
 3321         vm_object_t object;
 3322         int locked;
 3323         vm_inherit_t inh;
 3324 
 3325         old_map = &vm1->vm_map;
 3326         /* Copy immutable fields of vm1 to vm2. */
 3327         vm2 = vmspace_alloc(old_map->min_offset, old_map->max_offset, NULL);
 3328         if (vm2 == NULL)
 3329                 return (NULL);
 3330         vm2->vm_taddr = vm1->vm_taddr;
 3331         vm2->vm_daddr = vm1->vm_daddr;
 3332         vm2->vm_maxsaddr = vm1->vm_maxsaddr;
 3333         vm_map_lock(old_map);
 3334         if (old_map->busy)
 3335                 vm_map_wait_busy(old_map);
 3336         new_map = &vm2->vm_map;
 3337         locked = vm_map_trylock(new_map); /* trylock to silence WITNESS */
 3338         KASSERT(locked, ("vmspace_fork: lock failed"));
 3339 
 3340         old_entry = old_map->header.next;
 3341 
 3342         while (old_entry != &old_map->header) {
 3343                 if (old_entry->eflags & MAP_ENTRY_IS_SUB_MAP)
 3344                         panic("vm_map_fork: encountered a submap");
 3345 
 3346                 inh = old_entry->inheritance;
 3347                 if ((old_entry->eflags & MAP_ENTRY_GUARD) != 0 &&
 3348                     inh != VM_INHERIT_NONE)
 3349                         inh = VM_INHERIT_COPY;
 3350 
 3351                 switch (inh) {
 3352                 case VM_INHERIT_NONE:
 3353                         break;
 3354 
 3355                 case VM_INHERIT_SHARE:
 3356                         /*
 3357                          * Clone the entry, creating the shared object if necessary.
 3358                          */
 3359                         object = old_entry->object.vm_object;
 3360                         if (object == NULL) {
 3361                                 object = vm_object_allocate(OBJT_DEFAULT,
 3362                                         atop(old_entry->end - old_entry->start));
 3363                                 old_entry->object.vm_object = object;
 3364                                 old_entry->offset = 0;
 3365                                 if (old_entry->cred != NULL) {
 3366                                         object->cred = old_entry->cred;
 3367                                         object->charge = old_entry->end -
 3368                                             old_entry->start;
 3369                                         old_entry->cred = NULL;
 3370                                 }
 3371                         }
 3372 
 3373                         /*
 3374                          * Add the reference before calling vm_object_shadow
 3375                          * to insure that a shadow object is created.
 3376                          */
 3377                         vm_object_reference(object);
 3378                         if (old_entry->eflags & MAP_ENTRY_NEEDS_COPY) {
 3379                                 vm_object_shadow(&old_entry->object.vm_object,
 3380                                     &old_entry->offset,
 3381                                     old_entry->end - old_entry->start);
 3382                                 old_entry->eflags &= ~MAP_ENTRY_NEEDS_COPY;
 3383                                 /* Transfer the second reference too. */
 3384                                 vm_object_reference(
 3385                                     old_entry->object.vm_object);
 3386 
 3387                                 /*
 3388                                  * As in vm_map_simplify_entry(), the
 3389                                  * vnode lock will not be acquired in
 3390                                  * this call to vm_object_deallocate().
 3391                                  */
 3392                                 vm_object_deallocate(object);
 3393                                 object = old_entry->object.vm_object;
 3394                         }
 3395                         VM_OBJECT_WLOCK(object);
 3396                         vm_object_clear_flag(object, OBJ_ONEMAPPING);
 3397                         if (old_entry->cred != NULL) {
 3398                                 KASSERT(object->cred == NULL, ("vmspace_fork both cred"));
 3399                                 object->cred = old_entry->cred;
 3400                                 object->charge = old_entry->end - old_entry->start;
 3401                                 old_entry->cred = NULL;
 3402                         }
 3403 
 3404                         /*
 3405                          * Assert the correct state of the vnode
 3406                          * v_writecount while the object is locked, to
 3407                          * not relock it later for the assertion
 3408                          * correctness.
 3409                          */
 3410                         if (old_entry->eflags & MAP_ENTRY_VN_WRITECNT &&
 3411                             object->type == OBJT_VNODE) {
 3412                                 KASSERT(((struct vnode *)object->handle)->
 3413                                     v_writecount > 0,
 3414                                     ("vmspace_fork: v_writecount %p", object));
 3415                                 KASSERT(object->un_pager.vnp.writemappings > 0,
 3416                                     ("vmspace_fork: vnp.writecount %p",
 3417                                     object));
 3418                         }
 3419                         VM_OBJECT_WUNLOCK(object);
 3420 
 3421                         /*
 3422                          * Clone the entry, referencing the shared object.
 3423                          */
 3424                         new_entry = vm_map_entry_create(new_map);
 3425                         *new_entry = *old_entry;
 3426                         new_entry->eflags &= ~(MAP_ENTRY_USER_WIRED |
 3427                             MAP_ENTRY_IN_TRANSITION);
 3428                         new_entry->wiring_thread = NULL;
 3429                         new_entry->wired_count = 0;
 3430                         if (new_entry->eflags & MAP_ENTRY_VN_WRITECNT) {
 3431                                 vnode_pager_update_writecount(object,
 3432                                     new_entry->start, new_entry->end);
 3433                         }
 3434 
 3435                         /*
 3436                          * Insert the entry into the new map -- we know we're
 3437                          * inserting at the end of the new map.
 3438                          */
 3439                         vm_map_entry_link(new_map, new_map->header.prev,
 3440                             new_entry);
 3441                         vmspace_map_entry_forked(vm1, vm2, new_entry);
 3442 
 3443                         /*
 3444                          * Update the physical map
 3445                          */
 3446                         pmap_copy(new_map->pmap, old_map->pmap,
 3447                             new_entry->start,
 3448                             (old_entry->end - old_entry->start),
 3449                             old_entry->start);
 3450                         break;
 3451 
 3452                 case VM_INHERIT_COPY:
 3453                         /*
 3454                          * Clone the entry and link into the map.
 3455                          */
 3456                         new_entry = vm_map_entry_create(new_map);
 3457                         *new_entry = *old_entry;
 3458                         /*
 3459                          * Copied entry is COW over the old object.
 3460                          */
 3461                         new_entry->eflags &= ~(MAP_ENTRY_USER_WIRED |
 3462                             MAP_ENTRY_IN_TRANSITION | MAP_ENTRY_VN_WRITECNT);
 3463                         new_entry->wiring_thread = NULL;
 3464                         new_entry->wired_count = 0;
 3465                         new_entry->object.vm_object = NULL;
 3466                         new_entry->cred = NULL;
 3467                         vm_map_entry_link(new_map, new_map->header.prev,
 3468                             new_entry);
 3469                         vmspace_map_entry_forked(vm1, vm2, new_entry);
 3470                         vm_map_copy_entry(old_map, new_map, old_entry,
 3471                             new_entry, fork_charge);
 3472                         break;
 3473 
 3474                 case VM_INHERIT_ZERO:
 3475                         /*
 3476                          * Create a new anonymous mapping entry modelled from
 3477                          * the old one.
 3478                          */
 3479                         new_entry = vm_map_entry_create(new_map);
 3480                         memset(new_entry, 0, sizeof(*new_entry));
 3481 
 3482                         new_entry->start = old_entry->start;
 3483                         new_entry->end = old_entry->end;
 3484                         new_entry->eflags = old_entry->eflags &
 3485                             ~(MAP_ENTRY_USER_WIRED | MAP_ENTRY_IN_TRANSITION |
 3486                             MAP_ENTRY_VN_WRITECNT);
 3487                         new_entry->protection = old_entry->protection;
 3488                         new_entry->max_protection = old_entry->max_protection;
 3489                         new_entry->inheritance = VM_INHERIT_ZERO;
 3490 
 3491                         vm_map_entry_link(new_map, new_map->header.prev,
 3492                             new_entry);
 3493                         vmspace_map_entry_forked(vm1, vm2, new_entry);
 3494 
 3495                         new_entry->cred = curthread->td_ucred;
 3496                         crhold(new_entry->cred);
 3497                         *fork_charge += (new_entry->end - new_entry->start);
 3498 
 3499                         break;
 3500                 }
 3501                 old_entry = old_entry->next;
 3502         }
 3503         /*
 3504          * Use inlined vm_map_unlock() to postpone handling the deferred
 3505          * map entries, which cannot be done until both old_map and
 3506          * new_map locks are released.
 3507          */
 3508         sx_xunlock(&old_map->lock);
 3509         sx_xunlock(&new_map->lock);
 3510         vm_map_process_deferred();
 3511 
 3512         return (vm2);
 3513 }
 3514 
 3515 /*
 3516  * Create a process's stack for exec_new_vmspace().  This function is never
 3517  * asked to wire the newly created stack.
 3518  */
 3519 int
 3520 vm_map_stack(vm_map_t map, vm_offset_t addrbos, vm_size_t max_ssize,
 3521     vm_prot_t prot, vm_prot_t max, int cow)
 3522 {
 3523         vm_size_t growsize, init_ssize;
 3524         rlim_t vmemlim;
 3525         int rv;
 3526 
 3527         MPASS((map->flags & MAP_WIREFUTURE) == 0);
 3528         growsize = sgrowsiz;
 3529         init_ssize = (max_ssize < growsize) ? max_ssize : growsize;
 3530         vm_map_lock(map);
 3531         PROC_LOCK(curproc);
 3532         vmemlim = lim_cur(curproc, RLIMIT_VMEM);
 3533         PROC_UNLOCK(curproc);
 3534         /* If we would blow our VMEM resource limit, no go */
 3535         if (map->size + init_ssize > vmemlim) {
 3536                 rv = KERN_NO_SPACE;
 3537                 goto out;
 3538         }
 3539         rv = vm_map_stack_locked(map, addrbos, max_ssize, growsize, prot,
 3540             max, cow);
 3541 out:
 3542         vm_map_unlock(map);
 3543         return (rv);
 3544 }
 3545 
 3546 static int stack_guard_page = 1;
 3547 SYSCTL_INT(_security_bsd, OID_AUTO, stack_guard_page, CTLFLAG_RWTUN,
 3548     &stack_guard_page, 0,
 3549     "Specifies the number of guard pages for a stack that grows");
 3550 
 3551 static int
 3552 vm_map_stack_locked(vm_map_t map, vm_offset_t addrbos, vm_size_t max_ssize,
 3553     vm_size_t growsize, vm_prot_t prot, vm_prot_t max, int cow)
 3554 {
 3555         vm_map_entry_t new_entry, prev_entry;
 3556         vm_offset_t bot, gap_bot, gap_top, top;
 3557         vm_size_t init_ssize, sgp;
 3558         int orient, rv;
 3559 
 3560         /*
 3561          * The stack orientation is piggybacked with the cow argument.
 3562          * Extract it into orient and mask the cow argument so that we
 3563          * don't pass it around further.
 3564          */
 3565         orient = cow & (MAP_STACK_GROWS_DOWN | MAP_STACK_GROWS_UP);
 3566         KASSERT(orient != 0, ("No stack grow direction"));
 3567         KASSERT(orient != (MAP_STACK_GROWS_DOWN | MAP_STACK_GROWS_UP),
 3568             ("bi-dir stack"));
 3569 
 3570         if (addrbos < vm_map_min(map) ||
 3571             addrbos + max_ssize > vm_map_max(map) ||
 3572             addrbos + max_ssize <= addrbos)
 3573                 return (KERN_INVALID_ADDRESS);
 3574         sgp = (vm_size_t)stack_guard_page * PAGE_SIZE;
 3575         if (sgp >= max_ssize)
 3576                 return (KERN_INVALID_ARGUMENT);
 3577 
 3578         init_ssize = growsize;
 3579         if (max_ssize < init_ssize + sgp)
 3580                 init_ssize = max_ssize - sgp;
 3581 
 3582         /* If addr is already mapped, no go */
 3583         if (vm_map_lookup_entry(map, addrbos, &prev_entry))
 3584                 return (KERN_NO_SPACE);
 3585 
 3586         /*
 3587          * If we can't accomodate max_ssize in the current mapping, no go.
 3588          */
 3589         if ((prev_entry->next != &map->header) &&
 3590             (prev_entry->next->start < addrbos + max_ssize))
 3591                 return (KERN_NO_SPACE);
 3592 
 3593         /*
 3594          * We initially map a stack of only init_ssize.  We will grow as
 3595          * needed later.  Depending on the orientation of the stack (i.e.
 3596          * the grow direction) we either map at the top of the range, the
 3597          * bottom of the range or in the middle.
 3598          *
 3599          * Note: we would normally expect prot and max to be VM_PROT_ALL,
 3600          * and cow to be 0.  Possibly we should eliminate these as input
 3601          * parameters, and just pass these values here in the insert call.
 3602          */
 3603         if (orient == MAP_STACK_GROWS_DOWN) {
 3604                 bot = addrbos + max_ssize - init_ssize;
 3605                 top = bot + init_ssize;
 3606                 gap_bot = addrbos;
 3607                 gap_top = bot;
 3608         } else /* if (orient == MAP_STACK_GROWS_UP) */ {
 3609                 bot = addrbos;
 3610                 top = bot + init_ssize;
 3611                 gap_bot = top;
 3612                 gap_top = addrbos + max_ssize;
 3613         }
 3614         rv = vm_map_insert(map, NULL, 0, bot, top, prot, max, cow);
 3615         if (rv != KERN_SUCCESS)
 3616                 return (rv);
 3617         new_entry = prev_entry->next;
 3618         KASSERT(new_entry->end == top || new_entry->start == bot,
 3619             ("Bad entry start/end for new stack entry"));
 3620         KASSERT((orient & MAP_STACK_GROWS_DOWN) == 0 ||
 3621             (new_entry->eflags & MAP_ENTRY_GROWS_DOWN) != 0,
 3622             ("new entry lacks MAP_ENTRY_GROWS_DOWN"));
 3623         KASSERT((orient & MAP_STACK_GROWS_UP) == 0 ||
 3624             (new_entry->eflags & MAP_ENTRY_GROWS_UP) != 0,
 3625             ("new entry lacks MAP_ENTRY_GROWS_UP"));
 3626         rv = vm_map_insert(map, NULL, 0, gap_bot, gap_top, VM_PROT_NONE,
 3627             VM_PROT_NONE, MAP_CREATE_GUARD | (orient == MAP_STACK_GROWS_DOWN ?
 3628             MAP_CREATE_STACK_GAP_DN : MAP_CREATE_STACK_GAP_UP));
 3629         if (rv != KERN_SUCCESS)
 3630                 (void)vm_map_delete(map, bot, top);
 3631         return (rv);
 3632 }
 3633 
 3634 /*
 3635  * Attempts to grow a vm stack entry.  Returns KERN_SUCCESS if we
 3636  * successfully grow the stack.
 3637  */
 3638 static int
 3639 vm_map_growstack(vm_map_t map, vm_offset_t addr, vm_map_entry_t gap_entry)
 3640 {
 3641         vm_map_entry_t stack_entry;
 3642         struct proc *p;
 3643         struct vmspace *vm;
 3644         struct ucred *cred;
 3645         vm_offset_t gap_end, gap_start, grow_start;
 3646         size_t grow_amount, guard, max_grow;
 3647         rlim_t lmemlim, stacklim, vmemlim;
 3648         int rv, rv1;
 3649         bool gap_deleted, grow_down, is_procstack;
 3650 #ifdef notyet
 3651         uint64_t limit;
 3652 #endif
 3653 #ifdef RACCT
 3654         int error;
 3655 #endif
 3656 
 3657         p = curproc;
 3658         vm = p->p_vmspace;
 3659 
 3660         /*
 3661          * Disallow stack growth when the access is performed by a
 3662          * debugger or AIO daemon.  The reason is that the wrong
 3663          * resource limits are applied.
 3664          */
 3665         if (map != &p->p_vmspace->vm_map || p->p_textvp == NULL)
 3666                 return (KERN_FAILURE);
 3667 
 3668         MPASS(!map->system_map);
 3669 
 3670         guard = stack_guard_page * PAGE_SIZE;
 3671         PROC_LOCK(p);
 3672         lmemlim = lim_cur(p, RLIMIT_MEMLOCK);
 3673         stacklim = lim_cur(p, RLIMIT_STACK);
 3674         vmemlim = lim_cur(p, RLIMIT_VMEM);
 3675         PROC_UNLOCK(p);
 3676 retry:
 3677         /* If addr is not in a hole for a stack grow area, no need to grow. */
 3678         if (gap_entry == NULL && !vm_map_lookup_entry(map, addr, &gap_entry))
 3679                 return (KERN_FAILURE);
 3680         if ((gap_entry->eflags & MAP_ENTRY_GUARD) == 0)
 3681                 return (KERN_SUCCESS);
 3682         if ((gap_entry->eflags & MAP_ENTRY_STACK_GAP_DN) != 0) {
 3683                 stack_entry = gap_entry->next;
 3684                 if ((stack_entry->eflags & MAP_ENTRY_GROWS_DOWN) == 0 ||
 3685                     stack_entry->start != gap_entry->end)
 3686                         return (KERN_FAILURE);
 3687                 grow_amount = round_page(stack_entry->start - addr);
 3688                 grow_down = true;
 3689         } else if ((gap_entry->eflags & MAP_ENTRY_STACK_GAP_UP) != 0) {
 3690                 stack_entry = gap_entry->prev;
 3691                 if ((stack_entry->eflags & MAP_ENTRY_GROWS_UP) == 0 ||
 3692                     stack_entry->end != gap_entry->start)
 3693                         return (KERN_FAILURE);
 3694                 grow_amount = round_page(addr + 1 - stack_entry->end);
 3695                 grow_down = false;
 3696         } else {
 3697                 return (KERN_FAILURE);
 3698         }
 3699         max_grow = gap_entry->end - gap_entry->start;
 3700         if (guard > max_grow)
 3701                 return (KERN_NO_SPACE);
 3702         max_grow -= guard;
 3703         if (grow_amount > max_grow)
 3704                 return (KERN_NO_SPACE);
 3705 
 3706         /*
 3707          * If this is the main process stack, see if we're over the stack
 3708          * limit.
 3709          */
 3710         is_procstack = addr >= (vm_offset_t)vm->vm_maxsaddr &&
 3711             addr < (vm_offset_t)p->p_sysent->sv_usrstack;
 3712         if (is_procstack && (ctob(vm->vm_ssize) + grow_amount > stacklim))
 3713                 return (KERN_NO_SPACE);
 3714 
 3715 #ifdef RACCT
 3716         if (racct_enable) {
 3717                 PROC_LOCK(p);
 3718                 if (is_procstack && racct_set(p, RACCT_STACK,
 3719                     ctob(vm->vm_ssize) + grow_amount)) {
 3720                         PROC_UNLOCK(p);
 3721                         return (KERN_NO_SPACE);
 3722                 }
 3723                 PROC_UNLOCK(p);
 3724         }
 3725 #endif
 3726 
 3727         grow_amount = roundup(grow_amount, sgrowsiz);
 3728         if (grow_amount > max_grow)
 3729                 grow_amount = max_grow;
 3730         if (is_procstack && (ctob(vm->vm_ssize) + grow_amount > stacklim)) {
 3731                 grow_amount = trunc_page((vm_size_t)stacklim) -
 3732                     ctob(vm->vm_ssize);
 3733         }
 3734 
 3735 #ifdef notyet
 3736         PROC_LOCK(p);
 3737         limit = racct_get_available(p, RACCT_STACK);
 3738         PROC_UNLOCK(p);
 3739         if (is_procstack && (ctob(vm->vm_ssize) + grow_amount > limit))
 3740                 grow_amount = limit - ctob(vm->vm_ssize);
 3741 #endif
 3742 
 3743         if (!old_mlock && (map->flags & MAP_WIREFUTURE) != 0) {
 3744                 if (ptoa(pmap_wired_count(map->pmap)) + grow_amount > lmemlim) {
 3745                         rv = KERN_NO_SPACE;
 3746                         goto out;
 3747                 }
 3748 #ifdef RACCT
 3749                 if (racct_enable) {
 3750                         PROC_LOCK(p);
 3751                         if (racct_set(p, RACCT_MEMLOCK,
 3752                             ptoa(pmap_wired_count(map->pmap)) + grow_amount)) {
 3753                                 PROC_UNLOCK(p);
 3754                                 rv = KERN_NO_SPACE;
 3755                                 goto out;
 3756                         }
 3757                         PROC_UNLOCK(p);
 3758                 }
 3759 #endif
 3760         }
 3761 
 3762         /* If we would blow our VMEM resource limit, no go */
 3763         if (map->size + grow_amount > vmemlim) {
 3764                 rv = KERN_NO_SPACE;
 3765                 goto out;
 3766         }
 3767 #ifdef RACCT
 3768         if (racct_enable) {
 3769                 PROC_LOCK(p);
 3770                 if (racct_set(p, RACCT_VMEM, map->size + grow_amount)) {
 3771                         PROC_UNLOCK(p);
 3772                         rv = KERN_NO_SPACE;
 3773                         goto out;
 3774                 }
 3775                 PROC_UNLOCK(p);
 3776         }
 3777 #endif
 3778 
 3779         if (vm_map_lock_upgrade(map)) {
 3780                 gap_entry = NULL;
 3781                 vm_map_lock_read(map);
 3782                 goto retry;
 3783         }
 3784 
 3785         if (grow_down) {
 3786                 grow_start = gap_entry->end - grow_amount;
 3787                 if (gap_entry->start + grow_amount == gap_entry->end) {
 3788                         gap_start = gap_entry->start;
 3789                         gap_end = gap_entry->end;
 3790                         vm_map_entry_delete(map, gap_entry);
 3791                         gap_deleted = true;
 3792                 } else {
 3793                         MPASS(gap_entry->start < gap_entry->end - grow_amount);
 3794                         gap_entry->end -= grow_amount;
 3795                         vm_map_entry_resize_free(map, gap_entry);
 3796                         gap_deleted = false;
 3797                 }
 3798                 rv = vm_map_insert(map, NULL, 0, grow_start,
 3799                     grow_start + grow_amount,
 3800                     stack_entry->protection, stack_entry->max_protection,
 3801                     MAP_STACK_GROWS_DOWN);
 3802                 if (rv != KERN_SUCCESS) {
 3803                         if (gap_deleted) {
 3804                                 rv1 = vm_map_insert(map, NULL, 0, gap_start,
 3805                                     gap_end, VM_PROT_NONE, VM_PROT_NONE,
 3806                                     MAP_CREATE_GUARD | MAP_CREATE_STACK_GAP_DN);
 3807                                 MPASS(rv1 == KERN_SUCCESS);
 3808                         } else {
 3809                                 gap_entry->end += grow_amount;
 3810                                 vm_map_entry_resize_free(map, gap_entry);
 3811                         }
 3812                 }
 3813         } else {
 3814                 grow_start = stack_entry->end;
 3815                 cred = stack_entry->cred;
 3816                 if (cred == NULL && stack_entry->object.vm_object != NULL)
 3817                         cred = stack_entry->object.vm_object->cred;
 3818                 if (cred != NULL && !swap_reserve_by_cred(grow_amount, cred))
 3819                         rv = KERN_NO_SPACE;
 3820                 /* Grow the underlying object if applicable. */
 3821                 else if (stack_entry->object.vm_object == NULL ||
 3822                     vm_object_coalesce(stack_entry->object.vm_object,
 3823                     stack_entry->offset,
 3824                     (vm_size_t)(stack_entry->end - stack_entry->start),
 3825                     (vm_size_t)grow_amount, cred != NULL)) {
 3826                         if (gap_entry->start + grow_amount == gap_entry->end)
 3827                                 vm_map_entry_delete(map, gap_entry);
 3828                         else
 3829                                 gap_entry->start += grow_amount;
 3830                         stack_entry->end += grow_amount;
 3831                         map->size += grow_amount;
 3832                         vm_map_entry_resize_free(map, stack_entry);
 3833                         rv = KERN_SUCCESS;
 3834                 } else
 3835                         rv = KERN_FAILURE;
 3836         }
 3837         if (rv == KERN_SUCCESS && is_procstack)
 3838                 vm->vm_ssize += btoc(grow_amount);
 3839 
 3840         /*
 3841          * Heed the MAP_WIREFUTURE flag if it was set for this process.
 3842          */
 3843         if (rv == KERN_SUCCESS && (map->flags & MAP_WIREFUTURE) != 0) {
 3844                 vm_map_unlock(map);
 3845                 vm_map_wire(map, grow_start, grow_start + grow_amount,
 3846                     (p->p_flag & P_SYSTEM)
 3847                     ? VM_MAP_WIRE_SYSTEM|VM_MAP_WIRE_NOHOLES
 3848                     : VM_MAP_WIRE_USER|VM_MAP_WIRE_NOHOLES);
 3849                 vm_map_lock_read(map);
 3850         } else
 3851                 vm_map_lock_downgrade(map);
 3852 
 3853 out:
 3854 #ifdef RACCT
 3855         if (racct_enable && rv != KERN_SUCCESS) {
 3856                 PROC_LOCK(p);
 3857                 error = racct_set(p, RACCT_VMEM, map->size);
 3858                 KASSERT(error == 0, ("decreasing RACCT_VMEM failed"));
 3859                 if (!old_mlock) {
 3860                         error = racct_set(p, RACCT_MEMLOCK,
 3861                             ptoa(pmap_wired_count(map->pmap)));
 3862                         KASSERT(error == 0, ("decreasing RACCT_MEMLOCK failed"));
 3863                 }
 3864                 error = racct_set(p, RACCT_STACK, ctob(vm->vm_ssize));
 3865                 KASSERT(error == 0, ("decreasing RACCT_STACK failed"));
 3866                 PROC_UNLOCK(p);
 3867         }
 3868 #endif
 3869 
 3870         return (rv);
 3871 }
 3872 
 3873 /*
 3874  * Unshare the specified VM space for exec.  If other processes are
 3875  * mapped to it, then create a new one.  The new vmspace is null.
 3876  */
 3877 int
 3878 vmspace_exec(struct proc *p, vm_offset_t minuser, vm_offset_t maxuser)
 3879 {
 3880         struct vmspace *oldvmspace = p->p_vmspace;
 3881         struct vmspace *newvmspace;
 3882 
 3883         KASSERT((curthread->td_pflags & TDP_EXECVMSPC) == 0,
 3884             ("vmspace_exec recursed"));
 3885         newvmspace = vmspace_alloc(minuser, maxuser, NULL);
 3886         if (newvmspace == NULL)
 3887                 return (ENOMEM);
 3888         newvmspace->vm_swrss = oldvmspace->vm_swrss;
 3889         /*
 3890          * This code is written like this for prototype purposes.  The
 3891          * goal is to avoid running down the vmspace here, but let the
 3892          * other process's that are still using the vmspace to finally
 3893          * run it down.  Even though there is little or no chance of blocking
 3894          * here, it is a good idea to keep this form for future mods.
 3895          */
 3896         PROC_VMSPACE_LOCK(p);
 3897         p->p_vmspace = newvmspace;
 3898         PROC_VMSPACE_UNLOCK(p);
 3899         if (p == curthread->td_proc)
 3900                 pmap_activate(curthread);
 3901         curthread->td_pflags |= TDP_EXECVMSPC;
 3902         return (0);
 3903 }
 3904 
 3905 /*
 3906  * Unshare the specified VM space for forcing COW.  This
 3907  * is called by rfork, for the (RFMEM|RFPROC) == 0 case.
 3908  */
 3909 int
 3910 vmspace_unshare(struct proc *p)
 3911 {
 3912         struct vmspace *oldvmspace = p->p_vmspace;
 3913         struct vmspace *newvmspace;
 3914         vm_ooffset_t fork_charge;
 3915 
 3916         if (oldvmspace->vm_refcnt == 1)
 3917                 return (0);
 3918         fork_charge = 0;
 3919         newvmspace = vmspace_fork(oldvmspace, &fork_charge);
 3920         if (newvmspace == NULL)
 3921                 return (ENOMEM);
 3922         if (!swap_reserve_by_cred(fork_charge, p->p_ucred)) {
 3923                 vmspace_free(newvmspace);
 3924                 return (ENOMEM);
 3925         }
 3926         PROC_VMSPACE_LOCK(p);
 3927         p->p_vmspace = newvmspace;
 3928         PROC_VMSPACE_UNLOCK(p);
 3929         if (p == curthread->td_proc)
 3930                 pmap_activate(curthread);
 3931         vmspace_free(oldvmspace);
 3932         return (0);
 3933 }
 3934 
 3935 /*
 3936  *      vm_map_lookup:
 3937  *
 3938  *      Finds the VM object, offset, and
 3939  *      protection for a given virtual address in the
 3940  *      specified map, assuming a page fault of the
 3941  *      type specified.
 3942  *
 3943  *      Leaves the map in question locked for read; return
 3944  *      values are guaranteed until a vm_map_lookup_done
 3945  *      call is performed.  Note that the map argument
 3946  *      is in/out; the returned map must be used in
 3947  *      the call to vm_map_lookup_done.
 3948  *
 3949  *      A handle (out_entry) is returned for use in
 3950  *      vm_map_lookup_done, to make that fast.
 3951  *
 3952  *      If a lookup is requested with "write protection"
 3953  *      specified, the map may be changed to perform virtual
 3954  *      copying operations, although the data referenced will
 3955  *      remain the same.
 3956  */
 3957 int
 3958 vm_map_lookup(vm_map_t *var_map,                /* IN/OUT */
 3959               vm_offset_t vaddr,
 3960               vm_prot_t fault_typea,
 3961               vm_map_entry_t *out_entry,        /* OUT */
 3962               vm_object_t *object,              /* OUT */
 3963               vm_pindex_t *pindex,              /* OUT */
 3964               vm_prot_t *out_prot,              /* OUT */
 3965               boolean_t *wired)                 /* OUT */
 3966 {
 3967         vm_map_entry_t entry;
 3968         vm_map_t map = *var_map;
 3969         vm_prot_t prot;
 3970         vm_prot_t fault_type = fault_typea;
 3971         vm_object_t eobject;
 3972         vm_size_t size;
 3973         struct ucred *cred;
 3974 
 3975 RetryLookup:
 3976 
 3977         vm_map_lock_read(map);
 3978 
 3979 RetryLookupLocked:
 3980         /*
 3981          * Lookup the faulting address.
 3982          */
 3983         if (!vm_map_lookup_entry(map, vaddr, out_entry)) {
 3984                 vm_map_unlock_read(map);
 3985                 return (KERN_INVALID_ADDRESS);
 3986         }
 3987 
 3988         entry = *out_entry;
 3989 
 3990         /*
 3991          * Handle submaps.
 3992          */
 3993         if (entry->eflags & MAP_ENTRY_IS_SUB_MAP) {
 3994                 vm_map_t old_map = map;
 3995 
 3996                 *var_map = map = entry->object.sub_map;
 3997                 vm_map_unlock_read(old_map);
 3998                 goto RetryLookup;
 3999         }
 4000 
 4001         /*
 4002          * Check whether this task is allowed to have this page.
 4003          */
 4004         prot = entry->protection;
 4005         if ((fault_typea & VM_PROT_FAULT_LOOKUP) != 0) {
 4006                 fault_typea &= ~VM_PROT_FAULT_LOOKUP;
 4007                 if (prot == VM_PROT_NONE && map != kernel_map &&
 4008                     (entry->eflags & MAP_ENTRY_GUARD) != 0 &&
 4009                     (entry->eflags & (MAP_ENTRY_STACK_GAP_DN |
 4010                     MAP_ENTRY_STACK_GAP_UP)) != 0 &&
 4011                     vm_map_growstack(map, vaddr, entry) == KERN_SUCCESS)
 4012                         goto RetryLookupLocked;
 4013         }
 4014         fault_type &= VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE;
 4015         if ((fault_type & prot) != fault_type || prot == VM_PROT_NONE) {
 4016                 vm_map_unlock_read(map);
 4017                 return (KERN_PROTECTION_FAILURE);
 4018         }
 4019         KASSERT((prot & VM_PROT_WRITE) == 0 || (entry->eflags &
 4020             (MAP_ENTRY_USER_WIRED | MAP_ENTRY_NEEDS_COPY)) !=
 4021             (MAP_ENTRY_USER_WIRED | MAP_ENTRY_NEEDS_COPY),
 4022             ("entry %p flags %x", entry, entry->eflags));
 4023         if ((fault_typea & VM_PROT_COPY) != 0 &&
 4024             (entry->max_protection & VM_PROT_WRITE) == 0 &&
 4025             (entry->eflags & MAP_ENTRY_COW) == 0) {
 4026                 vm_map_unlock_read(map);
 4027                 return (KERN_PROTECTION_FAILURE);
 4028         }
 4029 
 4030         /*
 4031          * If this page is not pageable, we have to get it for all possible
 4032          * accesses.
 4033          */
 4034         *wired = (entry->wired_count != 0);
 4035         if (*wired)
 4036                 fault_type = entry->protection;
 4037         size = entry->end - entry->start;
 4038         /*
 4039          * If the entry was copy-on-write, we either ...
 4040          */
 4041         if (entry->eflags & MAP_ENTRY_NEEDS_COPY) {
 4042                 /*
 4043                  * If we want to write the page, we may as well handle that
 4044                  * now since we've got the map locked.
 4045                  *
 4046                  * If we don't need to write the page, we just demote the
 4047                  * permissions allowed.
 4048                  */
 4049                 if ((fault_type & VM_PROT_WRITE) != 0 ||
 4050                     (fault_typea & VM_PROT_COPY) != 0) {
 4051                         /*
 4052                          * Make a new object, and place it in the object
 4053                          * chain.  Note that no new references have appeared
 4054                          * -- one just moved from the map to the new
 4055                          * object.
 4056                          */
 4057                         if (vm_map_lock_upgrade(map))
 4058                                 goto RetryLookup;
 4059 
 4060                         if (entry->cred == NULL) {
 4061                                 /*
 4062                                  * The debugger owner is charged for
 4063                                  * the memory.
 4064                                  */
 4065                                 cred = curthread->td_ucred;
 4066                                 crhold(cred);
 4067                                 if (!swap_reserve_by_cred(size, cred)) {
 4068                                         crfree(cred);
 4069                                         vm_map_unlock(map);
 4070                                         return (KERN_RESOURCE_SHORTAGE);
 4071                                 }
 4072                                 entry->cred = cred;
 4073                         }
 4074                         vm_object_shadow(&entry->object.vm_object,
 4075                             &entry->offset, size);
 4076                         entry->eflags &= ~MAP_ENTRY_NEEDS_COPY;
 4077                         eobject = entry->object.vm_object;
 4078                         if (eobject->cred != NULL) {
 4079                                 /*
 4080                                  * The object was not shadowed.
 4081                                  */
 4082                                 swap_release_by_cred(size, entry->cred);
 4083                                 crfree(entry->cred);
 4084                                 entry->cred = NULL;
 4085                         } else if (entry->cred != NULL) {
 4086                                 VM_OBJECT_WLOCK(eobject);
 4087                                 eobject->cred = entry->cred;
 4088                                 eobject->charge = size;
 4089                                 VM_OBJECT_WUNLOCK(eobject);
 4090                                 entry->cred = NULL;
 4091                         }
 4092 
 4093                         vm_map_lock_downgrade(map);
 4094                 } else {
 4095                         /*
 4096                          * We're attempting to read a copy-on-write page --
 4097                          * don't allow writes.
 4098                          */
 4099                         prot &= ~VM_PROT_WRITE;
 4100                 }
 4101         }
 4102 
 4103         /*
 4104          * Create an object if necessary.
 4105          */
 4106         if (entry->object.vm_object == NULL &&
 4107             !map->system_map) {
 4108                 if (vm_map_lock_upgrade(map))
 4109                         goto RetryLookup;
 4110                 entry->object.vm_object = vm_object_allocate(OBJT_DEFAULT,
 4111                     atop(size));
 4112                 entry->offset = 0;
 4113                 if (entry->cred != NULL) {
 4114                         VM_OBJECT_WLOCK(entry->object.vm_object);
 4115                         entry->object.vm_object->cred = entry->cred;
 4116                         entry->object.vm_object->charge = size;
 4117                         VM_OBJECT_WUNLOCK(entry->object.vm_object);
 4118                         entry->cred = NULL;
 4119                 }
 4120                 vm_map_lock_downgrade(map);
 4121         }
 4122 
 4123         /*
 4124          * Return the object/offset from this entry.  If the entry was
 4125          * copy-on-write or empty, it has been fixed up.
 4126          */
 4127         *pindex = OFF_TO_IDX((vaddr - entry->start) + entry->offset);
 4128         *object = entry->object.vm_object;
 4129 
 4130         *out_prot = prot;
 4131         return (KERN_SUCCESS);
 4132 }
 4133 
 4134 /*
 4135  *      vm_map_lookup_locked:
 4136  *
 4137  *      Lookup the faulting address.  A version of vm_map_lookup that returns 
 4138  *      KERN_FAILURE instead of blocking on map lock or memory allocation.
 4139  */
 4140 int
 4141 vm_map_lookup_locked(vm_map_t *var_map,         /* IN/OUT */
 4142                      vm_offset_t vaddr,
 4143                      vm_prot_t fault_typea,
 4144                      vm_map_entry_t *out_entry, /* OUT */
 4145                      vm_object_t *object,       /* OUT */
 4146                      vm_pindex_t *pindex,       /* OUT */
 4147                      vm_prot_t *out_prot,       /* OUT */
 4148                      boolean_t *wired)          /* OUT */
 4149 {
 4150         vm_map_entry_t entry;
 4151         vm_map_t map = *var_map;
 4152         vm_prot_t prot;
 4153         vm_prot_t fault_type = fault_typea;
 4154 
 4155         /*
 4156          * Lookup the faulting address.
 4157          */
 4158         if (!vm_map_lookup_entry(map, vaddr, out_entry))
 4159                 return (KERN_INVALID_ADDRESS);
 4160 
 4161         entry = *out_entry;
 4162 
 4163         /*
 4164          * Fail if the entry refers to a submap.
 4165          */
 4166         if (entry->eflags & MAP_ENTRY_IS_SUB_MAP)
 4167                 return (KERN_FAILURE);
 4168 
 4169         /*
 4170          * Check whether this task is allowed to have this page.
 4171          */
 4172         prot = entry->protection;
 4173         fault_type &= VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE;
 4174         if ((fault_type & prot) != fault_type)
 4175                 return (KERN_PROTECTION_FAILURE);
 4176 
 4177         /*
 4178          * If this page is not pageable, we have to get it for all possible
 4179          * accesses.
 4180          */
 4181         *wired = (entry->wired_count != 0);
 4182         if (*wired)
 4183                 fault_type = entry->protection;
 4184 
 4185         if (entry->eflags & MAP_ENTRY_NEEDS_COPY) {
 4186                 /*
 4187                  * Fail if the entry was copy-on-write for a write fault.
 4188                  */
 4189                 if (fault_type & VM_PROT_WRITE)
 4190                         return (KERN_FAILURE);
 4191                 /*
 4192                  * We're attempting to read a copy-on-write page --
 4193                  * don't allow writes.
 4194                  */
 4195                 prot &= ~VM_PROT_WRITE;
 4196         }
 4197 
 4198         /*
 4199          * Fail if an object should be created.
 4200          */
 4201         if (entry->object.vm_object == NULL && !map->system_map)
 4202                 return (KERN_FAILURE);
 4203 
 4204         /*
 4205          * Return the object/offset from this entry.  If the entry was
 4206          * copy-on-write or empty, it has been fixed up.
 4207          */
 4208         *pindex = OFF_TO_IDX((vaddr - entry->start) + entry->offset);
 4209         *object = entry->object.vm_object;
 4210 
 4211         *out_prot = prot;
 4212         return (KERN_SUCCESS);
 4213 }
 4214 
 4215 /*
 4216  *      vm_map_lookup_done:
 4217  *
 4218  *      Releases locks acquired by a vm_map_lookup
 4219  *      (according to the handle returned by that lookup).
 4220  */
 4221 void
 4222 vm_map_lookup_done(vm_map_t map, vm_map_entry_t entry)
 4223 {
 4224         /*
 4225          * Unlock the main-level map
 4226          */
 4227         vm_map_unlock_read(map);
 4228 }
 4229 
 4230 #include "opt_ddb.h"
 4231 #ifdef DDB
 4232 #include <sys/kernel.h>
 4233 
 4234 #include <ddb/ddb.h>
 4235 
 4236 static void
 4237 vm_map_print(vm_map_t map)
 4238 {
 4239         vm_map_entry_t entry;
 4240 
 4241         db_iprintf("Task map %p: pmap=%p, nentries=%d, version=%u\n",
 4242             (void *)map,
 4243             (void *)map->pmap, map->nentries, map->timestamp);
 4244 
 4245         db_indent += 2;
 4246         for (entry = map->header.next; entry != &map->header;
 4247             entry = entry->next) {
 4248                 db_iprintf("map entry %p: start=%p, end=%p, eflags=%#x, \n",
 4249                     (void *)entry, (void *)entry->start, (void *)entry->end,
 4250                     entry->eflags);
 4251                 {
 4252                         static char *inheritance_name[4] =
 4253                         {"share", "copy", "none", "donate_copy"};
 4254 
 4255                         db_iprintf(" prot=%x/%x/%s",
 4256                             entry->protection,
 4257                             entry->max_protection,
 4258                             inheritance_name[(int)(unsigned char)entry->inheritance]);
 4259                         if (entry->wired_count != 0)
 4260                                 db_printf(", wired");
 4261                 }
 4262                 if (entry->eflags & MAP_ENTRY_IS_SUB_MAP) {
 4263                         db_printf(", share=%p, offset=0x%jx\n",
 4264                             (void *)entry->object.sub_map,
 4265                             (uintmax_t)entry->offset);
 4266                         if ((entry->prev == &map->header) ||
 4267                             (entry->prev->object.sub_map !=
 4268                                 entry->object.sub_map)) {
 4269                                 db_indent += 2;
 4270                                 vm_map_print((vm_map_t)entry->object.sub_map);
 4271                                 db_indent -= 2;
 4272                         }
 4273                 } else {
 4274                         if (entry->cred != NULL)
 4275                                 db_printf(", ruid %d", entry->cred->cr_ruid);
 4276                         db_printf(", object=%p, offset=0x%jx",
 4277                             (void *)entry->object.vm_object,
 4278                             (uintmax_t)entry->offset);
 4279                         if (entry->object.vm_object && entry->object.vm_object->cred)
 4280                                 db_printf(", obj ruid %d charge %jx",
 4281                                     entry->object.vm_object->cred->cr_ruid,
 4282                                     (uintmax_t)entry->object.vm_object->charge);
 4283                         if (entry->eflags & MAP_ENTRY_COW)
 4284                                 db_printf(", copy (%s)",
 4285                                     (entry->eflags & MAP_ENTRY_NEEDS_COPY) ? "needed" : "done");
 4286                         db_printf("\n");
 4287 
 4288                         if ((entry->prev == &map->header) ||
 4289                             (entry->prev->object.vm_object !=
 4290                                 entry->object.vm_object)) {
 4291                                 db_indent += 2;
 4292                                 vm_object_print((db_expr_t)(intptr_t)
 4293                                                 entry->object.vm_object,
 4294                                                 0, 0, (char *)0);
 4295                                 db_indent -= 2;
 4296                         }
 4297                 }
 4298         }
 4299         db_indent -= 2;
 4300 }
 4301 
 4302 DB_SHOW_COMMAND(map, map)
 4303 {
 4304 
 4305         if (!have_addr) {
 4306                 db_printf("usage: show map <addr>\n");
 4307                 return;
 4308         }
 4309         vm_map_print((vm_map_t)addr);
 4310 }
 4311 
 4312 DB_SHOW_COMMAND(procvm, procvm)
 4313 {
 4314         struct proc *p;
 4315 
 4316         if (have_addr) {
 4317                 p = db_lookup_proc(addr);
 4318         } else {
 4319                 p = curproc;
 4320         }
 4321 
 4322         db_printf("p = %p, vmspace = %p, map = %p, pmap = %p\n",
 4323             (void *)p, (void *)p->p_vmspace, (void *)&p->p_vmspace->vm_map,
 4324             (void *)vmspace_pmap(p->p_vmspace));
 4325 
 4326         vm_map_print((vm_map_t)&p->p_vmspace->vm_map);
 4327 }
 4328 
 4329 #endif /* DDB */

Cache object: c00dc60ab4185bcfb60dd218218a2fc4


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.