The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/uvm/uvm_map.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*      $OpenBSD: uvm_map.c,v 1.309 2023/01/31 15:18:55 deraadt Exp $   */
    2 /*      $NetBSD: uvm_map.c,v 1.86 2000/11/27 08:40:03 chs Exp $ */
    3 
    4 /*
    5  * Copyright (c) 2011 Ariane van der Steldt <ariane@openbsd.org>
    6  *
    7  * Permission to use, copy, modify, and distribute this software for any
    8  * purpose with or without fee is hereby granted, provided that the above
    9  * copyright notice and this permission notice appear in all copies.
   10  *
   11  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
   12  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
   13  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
   14  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
   15  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
   16  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
   17  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
   18  *
   19  *
   20  * Copyright (c) 1997 Charles D. Cranor and Washington University.
   21  * Copyright (c) 1991, 1993, The Regents of the University of California.
   22  *
   23  * All rights reserved.
   24  *
   25  * This code is derived from software contributed to Berkeley by
   26  * The Mach Operating System project at Carnegie-Mellon University.
   27  *
   28  * Redistribution and use in source and binary forms, with or without
   29  * modification, are permitted provided that the following conditions
   30  * are met:
   31  * 1. Redistributions of source code must retain the above copyright
   32  *    notice, this list of conditions and the following disclaimer.
   33  * 2. Redistributions in binary form must reproduce the above copyright
   34  *    notice, this list of conditions and the following disclaimer in the
   35  *    documentation and/or other materials provided with the distribution.
   36  * 3. Neither the name of the University nor the names of its contributors
   37  *    may be used to endorse or promote products derived from this software
   38  *    without specific prior written permission.
   39  *
   40  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   41  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   42  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   43  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   44  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   45  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   46  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   47  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   48  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   49  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   50  * SUCH DAMAGE.
   51  *
   52  *      @(#)vm_map.c    8.3 (Berkeley) 1/12/94
   53  * from: Id: uvm_map.c,v 1.1.2.27 1998/02/07 01:16:54 chs Exp
   54  *
   55  *
   56  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
   57  * All rights reserved.
   58  *
   59  * Permission to use, copy, modify and distribute this software and
   60  * its documentation is hereby granted, provided that both the copyright
   61  * notice and this permission notice appear in all copies of the
   62  * software, derivative works or modified versions, and any portions
   63  * thereof, and that both notices appear in supporting documentation.
   64  *
   65  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
   66  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
   67  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
   68  *
   69  * Carnegie Mellon requests users of this software to return to
   70  *
   71  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
   72  *  School of Computer Science
   73  *  Carnegie Mellon University
   74  *  Pittsburgh PA 15213-3890
   75  *
   76  * any improvements or extensions that they make and grant Carnegie the
   77  * rights to redistribute these changes.
   78  */
   79 
   80 /*
   81  * uvm_map.c: uvm map operations
   82  */
   83 
   84 /* #define DEBUG */
   85 /* #define VMMAP_DEBUG */
   86 
   87 #include <sys/param.h>
   88 #include <sys/systm.h>
   89 #include <sys/acct.h>
   90 #include <sys/mman.h>
   91 #include <sys/proc.h>
   92 #include <sys/malloc.h>
   93 #include <sys/pool.h>
   94 #include <sys/sysctl.h>
   95 #include <sys/signalvar.h>
   96 #include <sys/syslog.h>
   97 #include <sys/user.h>
   98 #include <sys/tracepoint.h>
   99 
  100 #ifdef SYSVSHM
  101 #include <sys/shm.h>
  102 #endif
  103 
  104 #include <uvm/uvm.h>
  105 
  106 #ifdef DDB
  107 #include <uvm/uvm_ddb.h>
  108 #endif
  109 
  110 #include <uvm/uvm_addr.h>
  111 
  112 
  113 vsize_t                  uvmspace_dused(struct vm_map*, vaddr_t, vaddr_t);
  114 int                      uvm_mapent_isjoinable(struct vm_map*,
  115                             struct vm_map_entry*, struct vm_map_entry*);
  116 struct vm_map_entry     *uvm_mapent_merge(struct vm_map*, struct vm_map_entry*,
  117                             struct vm_map_entry*, struct uvm_map_deadq*);
  118 struct vm_map_entry     *uvm_mapent_tryjoin(struct vm_map*,
  119                             struct vm_map_entry*, struct uvm_map_deadq*);
  120 struct vm_map_entry     *uvm_map_mkentry(struct vm_map*, struct vm_map_entry*,
  121                             struct vm_map_entry*, vaddr_t, vsize_t, int,
  122                             struct uvm_map_deadq*, struct vm_map_entry*);
  123 struct vm_map_entry     *uvm_mapent_alloc(struct vm_map*, int);
  124 void                     uvm_mapent_free(struct vm_map_entry*);
  125 void                     uvm_unmap_kill_entry(struct vm_map*,
  126                             struct vm_map_entry*);
  127 void                     uvm_unmap_kill_entry_withlock(struct vm_map *,
  128                             struct vm_map_entry *, int);
  129 void                     uvm_unmap_detach_intrsafe(struct uvm_map_deadq *);
  130 void                     uvm_mapent_mkfree(struct vm_map*,
  131                             struct vm_map_entry*, struct vm_map_entry**,
  132                             struct uvm_map_deadq*, boolean_t);
  133 void                     uvm_map_pageable_pgon(struct vm_map*,
  134                             struct vm_map_entry*, struct vm_map_entry*,
  135                             vaddr_t, vaddr_t);
  136 int                      uvm_map_pageable_wire(struct vm_map*,
  137                             struct vm_map_entry*, struct vm_map_entry*,
  138                             vaddr_t, vaddr_t, int);
  139 void                     uvm_map_setup_entries(struct vm_map*);
  140 void                     uvm_map_setup_md(struct vm_map*);
  141 void                     uvm_map_teardown(struct vm_map*);
  142 void                     uvm_map_vmspace_update(struct vm_map*,
  143                             struct uvm_map_deadq*, int);
  144 void                     uvm_map_kmem_grow(struct vm_map*,
  145                             struct uvm_map_deadq*, vsize_t, int);
  146 void                     uvm_map_freelist_update_clear(struct vm_map*,
  147                             struct uvm_map_deadq*);
  148 void                     uvm_map_freelist_update_refill(struct vm_map *, int);
  149 void                     uvm_map_freelist_update(struct vm_map*,
  150                             struct uvm_map_deadq*, vaddr_t, vaddr_t,
  151                             vaddr_t, vaddr_t, int);
  152 struct vm_map_entry     *uvm_map_fix_space(struct vm_map*, struct vm_map_entry*,
  153                             vaddr_t, vaddr_t, int);
  154 int                      uvm_map_findspace(struct vm_map*,
  155                             struct vm_map_entry**, struct vm_map_entry**,
  156                             vaddr_t*, vsize_t, vaddr_t, vaddr_t, vm_prot_t,
  157                             vaddr_t);
  158 vsize_t                  uvm_map_addr_augment_get(struct vm_map_entry*);
  159 void                     uvm_map_addr_augment(struct vm_map_entry*);
  160 
  161 int                      uvm_map_inentry_recheck(u_long, vaddr_t,
  162                              struct p_inentry *);
  163 boolean_t                uvm_map_inentry_fix(struct proc *, struct p_inentry *,
  164                              vaddr_t, int (*)(vm_map_entry_t), u_long);
  165 /*
  166  * Tree management functions.
  167  */
  168 
  169 static inline void       uvm_mapent_copy(struct vm_map_entry*,
  170                             struct vm_map_entry*);
  171 static inline int        uvm_mapentry_addrcmp(const struct vm_map_entry*,
  172                             const struct vm_map_entry*);
  173 void                     uvm_mapent_free_insert(struct vm_map*,
  174                             struct uvm_addr_state*, struct vm_map_entry*);
  175 void                     uvm_mapent_free_remove(struct vm_map*,
  176                             struct uvm_addr_state*, struct vm_map_entry*);
  177 void                     uvm_mapent_addr_insert(struct vm_map*,
  178                             struct vm_map_entry*);
  179 void                     uvm_mapent_addr_remove(struct vm_map*,
  180                             struct vm_map_entry*);
  181 void                     uvm_map_splitentry(struct vm_map*,
  182                             struct vm_map_entry*, struct vm_map_entry*,
  183                             vaddr_t);
  184 vsize_t                  uvm_map_boundary(struct vm_map*, vaddr_t, vaddr_t);
  185 
  186 /*
  187  * uvm_vmspace_fork helper functions.
  188  */
  189 struct vm_map_entry     *uvm_mapent_clone(struct vm_map*, vaddr_t, vsize_t,
  190                             vsize_t, vm_prot_t, vm_prot_t,
  191                             struct vm_map_entry*, struct uvm_map_deadq*, int,
  192                             int);
  193 struct vm_map_entry     *uvm_mapent_share(struct vm_map*, vaddr_t, vsize_t,
  194                             vsize_t, vm_prot_t, vm_prot_t, struct vm_map*,
  195                             struct vm_map_entry*, struct uvm_map_deadq*);
  196 struct vm_map_entry     *uvm_mapent_forkshared(struct vmspace*, struct vm_map*,
  197                             struct vm_map*, struct vm_map_entry*,
  198                             struct uvm_map_deadq*);
  199 struct vm_map_entry     *uvm_mapent_forkcopy(struct vmspace*, struct vm_map*,
  200                             struct vm_map*, struct vm_map_entry*,
  201                             struct uvm_map_deadq*);
  202 struct vm_map_entry     *uvm_mapent_forkzero(struct vmspace*, struct vm_map*,
  203                             struct vm_map*, struct vm_map_entry*,
  204                             struct uvm_map_deadq*);
  205 
  206 /*
  207  * Tree validation.
  208  */
  209 #ifdef VMMAP_DEBUG
  210 void                     uvm_tree_assert(struct vm_map*, int, char*,
  211                             char*, int);
  212 #define UVM_ASSERT(map, cond, file, line)                               \
  213         uvm_tree_assert((map), (cond), #cond, (file), (line))
  214 void                     uvm_tree_sanity(struct vm_map*, char*, int);
  215 void                     uvm_tree_size_chk(struct vm_map*, char*, int);
  216 void                     vmspace_validate(struct vm_map*);
  217 #else
  218 #define uvm_tree_sanity(_map, _file, _line)             do {} while (0)
  219 #define uvm_tree_size_chk(_map, _file, _line)           do {} while (0)
  220 #define vmspace_validate(_map)                          do {} while (0)
  221 #endif
  222 
  223 /*
  224  * The kernel map will initially be VM_MAP_KSIZE_INIT bytes.
  225  * Every time that gets cramped, we grow by at least VM_MAP_KSIZE_DELTA bytes.
  226  *
  227  * We attempt to grow by UVM_MAP_KSIZE_ALLOCMUL times the allocation size
  228  * each time.
  229  */
  230 #define VM_MAP_KSIZE_INIT       (512 * (vaddr_t)PAGE_SIZE)
  231 #define VM_MAP_KSIZE_DELTA      (256 * (vaddr_t)PAGE_SIZE)
  232 #define VM_MAP_KSIZE_ALLOCMUL   4
  233 
  234 /* auto-allocate address lower bound */
  235 #define VMMAP_MIN_ADDR          PAGE_SIZE
  236 
  237 
  238 #ifdef DEADBEEF0
  239 #define UVMMAP_DEADBEEF         ((unsigned long)DEADBEEF0)
  240 #else
  241 #define UVMMAP_DEADBEEF         ((unsigned long)0xdeadd0d0)
  242 #endif
  243 
  244 #ifdef DEBUG
  245 int uvm_map_printlocks = 0;
  246 
  247 #define LPRINTF(_args)                                                  \
  248         do {                                                            \
  249                 if (uvm_map_printlocks)                                 \
  250                         printf _args;                                   \
  251         } while (0)
  252 #else
  253 #define LPRINTF(_args)  do {} while (0)
  254 #endif
  255 
  256 static struct mutex uvm_kmapent_mtx;
  257 static struct timeval uvm_kmapent_last_warn_time;
  258 static struct timeval uvm_kmapent_warn_rate = { 10, 0 };
  259 
  260 const char vmmapbsy[] = "vmmapbsy";
  261 
  262 /*
  263  * pool for vmspace structures.
  264  */
  265 struct pool uvm_vmspace_pool;
  266 
  267 /*
  268  * pool for dynamically-allocated map entries.
  269  */
  270 struct pool uvm_map_entry_pool;
  271 struct pool uvm_map_entry_kmem_pool;
  272 
  273 /*
  274  * This global represents the end of the kernel virtual address
  275  * space. If we want to exceed this, we must grow the kernel
  276  * virtual address space dynamically.
  277  *
  278  * Note, this variable is locked by kernel_map's lock.
  279  */
  280 vaddr_t uvm_maxkaddr;
  281 
  282 /*
  283  * Locking predicate.
  284  */
  285 #define UVM_MAP_REQ_WRITE(_map)                                         \
  286         do {                                                            \
  287                 if ((_map)->ref_count > 0) {                            \
  288                         if (((_map)->flags & VM_MAP_INTRSAFE) == 0)     \
  289                                 rw_assert_wrlock(&(_map)->lock);        \
  290                         else                                            \
  291                                 MUTEX_ASSERT_LOCKED(&(_map)->mtx);      \
  292                 }                                                       \
  293         } while (0)
  294 
  295 #define vm_map_modflags(map, set, clear)                                \
  296         do {                                                            \
  297                 mtx_enter(&(map)->flags_lock);                          \
  298                 (map)->flags = ((map)->flags | (set)) & ~(clear);       \
  299                 mtx_leave(&(map)->flags_lock);                          \
  300         } while (0)
  301 
  302 
  303 /*
  304  * Tree describing entries by address.
  305  *
  306  * Addresses are unique.
  307  * Entries with start == end may only exist if they are the first entry
  308  * (sorted by address) within a free-memory tree.
  309  */
  310 
  311 static inline int
  312 uvm_mapentry_addrcmp(const struct vm_map_entry *e1,
  313     const struct vm_map_entry *e2)
  314 {
  315         return e1->start < e2->start ? -1 : e1->start > e2->start;
  316 }
  317 
  318 /*
  319  * Copy mapentry.
  320  */
  321 static inline void
  322 uvm_mapent_copy(struct vm_map_entry *src, struct vm_map_entry *dst)
  323 {
  324         caddr_t csrc, cdst;
  325         size_t sz;
  326 
  327         csrc = (caddr_t)src;
  328         cdst = (caddr_t)dst;
  329         csrc += offsetof(struct vm_map_entry, uvm_map_entry_start_copy);
  330         cdst += offsetof(struct vm_map_entry, uvm_map_entry_start_copy);
  331 
  332         sz = offsetof(struct vm_map_entry, uvm_map_entry_stop_copy) -
  333             offsetof(struct vm_map_entry, uvm_map_entry_start_copy);
  334         memcpy(cdst, csrc, sz);
  335 }
  336 
  337 /*
  338  * Handle free-list insertion.
  339  */
  340 void
  341 uvm_mapent_free_insert(struct vm_map *map, struct uvm_addr_state *uaddr,
  342     struct vm_map_entry *entry)
  343 {
  344         const struct uvm_addr_functions *fun;
  345 #ifdef VMMAP_DEBUG
  346         vaddr_t min, max, bound;
  347 #endif
  348 
  349 #ifdef VMMAP_DEBUG
  350         /*
  351          * Boundary check.
  352          * Boundaries are folded if they go on the same free list.
  353          */
  354         min = VMMAP_FREE_START(entry);
  355         max = VMMAP_FREE_END(entry);
  356 
  357         while (min < max) {
  358                 bound = uvm_map_boundary(map, min, max);
  359                 KASSERT(uvm_map_uaddr(map, min) == uaddr);
  360                 min = bound;
  361         }
  362 #endif
  363         KDASSERT((entry->fspace & (vaddr_t)PAGE_MASK) == 0);
  364         KASSERT((entry->etype & UVM_ET_FREEMAPPED) == 0);
  365 
  366         UVM_MAP_REQ_WRITE(map);
  367 
  368         /* Actual insert: forward to uaddr pointer. */
  369         if (uaddr != NULL) {
  370                 fun = uaddr->uaddr_functions;
  371                 KDASSERT(fun != NULL);
  372                 if (fun->uaddr_free_insert != NULL)
  373                         (*fun->uaddr_free_insert)(map, uaddr, entry);
  374                 entry->etype |= UVM_ET_FREEMAPPED;
  375         }
  376 
  377         /* Update fspace augmentation. */
  378         uvm_map_addr_augment(entry);
  379 }
  380 
  381 /*
  382  * Handle free-list removal.
  383  */
  384 void
  385 uvm_mapent_free_remove(struct vm_map *map, struct uvm_addr_state *uaddr,
  386     struct vm_map_entry *entry)
  387 {
  388         const struct uvm_addr_functions *fun;
  389 
  390         KASSERT((entry->etype & UVM_ET_FREEMAPPED) != 0 || uaddr == NULL);
  391         KASSERT(uvm_map_uaddr_e(map, entry) == uaddr);
  392         UVM_MAP_REQ_WRITE(map);
  393 
  394         if (uaddr != NULL) {
  395                 fun = uaddr->uaddr_functions;
  396                 if (fun->uaddr_free_remove != NULL)
  397                         (*fun->uaddr_free_remove)(map, uaddr, entry);
  398                 entry->etype &= ~UVM_ET_FREEMAPPED;
  399         }
  400 }
  401 
  402 /*
  403  * Handle address tree insertion.
  404  */
  405 void
  406 uvm_mapent_addr_insert(struct vm_map *map, struct vm_map_entry *entry)
  407 {
  408         struct vm_map_entry *res;
  409 
  410         if (!RBT_CHECK(uvm_map_addr, entry, UVMMAP_DEADBEEF))
  411                 panic("uvm_mapent_addr_insert: entry still in addr list");
  412         KDASSERT(entry->start <= entry->end);
  413         KDASSERT((entry->start & (vaddr_t)PAGE_MASK) == 0 &&
  414             (entry->end & (vaddr_t)PAGE_MASK) == 0);
  415 
  416         TRACEPOINT(uvm, map_insert,
  417             entry->start, entry->end, entry->protection, NULL);
  418 
  419         UVM_MAP_REQ_WRITE(map);
  420         res = RBT_INSERT(uvm_map_addr, &map->addr, entry);
  421         if (res != NULL) {
  422                 panic("uvm_mapent_addr_insert: map %p entry %p "
  423                     "(0x%lx-0x%lx G=0x%lx F=0x%lx) insert collision "
  424                     "with entry %p (0x%lx-0x%lx G=0x%lx F=0x%lx)",
  425                     map, entry,
  426                     entry->start, entry->end, entry->guard, entry->fspace,
  427                     res, res->start, res->end, res->guard, res->fspace);
  428         }
  429 }
  430 
  431 /*
  432  * Handle address tree removal.
  433  */
  434 void
  435 uvm_mapent_addr_remove(struct vm_map *map, struct vm_map_entry *entry)
  436 {
  437         struct vm_map_entry *res;
  438 
  439         TRACEPOINT(uvm, map_remove,
  440             entry->start, entry->end, entry->protection, NULL);
  441 
  442         UVM_MAP_REQ_WRITE(map);
  443         res = RBT_REMOVE(uvm_map_addr, &map->addr, entry);
  444         if (res != entry)
  445                 panic("uvm_mapent_addr_remove");
  446         RBT_POISON(uvm_map_addr, entry, UVMMAP_DEADBEEF);
  447 }
  448 
  449 /*
  450  * uvm_map_reference: add reference to a map
  451  *
  452  * => map need not be locked
  453  */
  454 void
  455 uvm_map_reference(struct vm_map *map)
  456 {
  457         atomic_inc_int(&map->ref_count);
  458 }
  459 
  460 void
  461 uvm_map_lock_entry(struct vm_map_entry *entry)
  462 {
  463         if (entry->aref.ar_amap != NULL) {
  464                 amap_lock(entry->aref.ar_amap);
  465         }
  466         if (UVM_ET_ISOBJ(entry)) {
  467                 rw_enter(entry->object.uvm_obj->vmobjlock, RW_WRITE);
  468         }
  469 }
  470 
  471 void
  472 uvm_map_unlock_entry(struct vm_map_entry *entry)
  473 {
  474         if (UVM_ET_ISOBJ(entry)) {
  475                 rw_exit(entry->object.uvm_obj->vmobjlock);
  476         }
  477         if (entry->aref.ar_amap != NULL) {
  478                 amap_unlock(entry->aref.ar_amap);
  479         }
  480 }
  481 
  482 /*
  483  * Calculate the dused delta.
  484  */
  485 vsize_t
  486 uvmspace_dused(struct vm_map *map, vaddr_t min, vaddr_t max)
  487 {
  488         struct vmspace *vm;
  489         vsize_t sz;
  490         vaddr_t lmax;
  491         vaddr_t stack_begin, stack_end; /* Position of stack. */
  492 
  493         KASSERT(map->flags & VM_MAP_ISVMSPACE);
  494         vm_map_assert_anylock(map);
  495 
  496         vm = (struct vmspace *)map;
  497         stack_begin = MIN((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr);
  498         stack_end = MAX((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr);
  499 
  500         sz = 0;
  501         while (min != max) {
  502                 lmax = max;
  503                 if (min < stack_begin && lmax > stack_begin)
  504                         lmax = stack_begin;
  505                 else if (min < stack_end && lmax > stack_end)
  506                         lmax = stack_end;
  507 
  508                 if (min >= stack_begin && min < stack_end) {
  509                         /* nothing */
  510                 } else
  511                         sz += lmax - min;
  512                 min = lmax;
  513         }
  514 
  515         return sz >> PAGE_SHIFT;
  516 }
  517 
  518 /*
  519  * Find the entry describing the given address.
  520  */
  521 struct vm_map_entry*
  522 uvm_map_entrybyaddr(struct uvm_map_addr *atree, vaddr_t addr)
  523 {
  524         struct vm_map_entry *iter;
  525 
  526         iter = RBT_ROOT(uvm_map_addr, atree);
  527         while (iter != NULL) {
  528                 if (iter->start > addr)
  529                         iter = RBT_LEFT(uvm_map_addr, iter);
  530                 else if (VMMAP_FREE_END(iter) <= addr)
  531                         iter = RBT_RIGHT(uvm_map_addr, iter);
  532                 else
  533                         return iter;
  534         }
  535         return NULL;
  536 }
  537 
  538 /*
  539  * DEAD_ENTRY_PUSH(struct vm_map_deadq *deadq, struct vm_map_entry *entry)
  540  *
  541  * Push dead entries into a linked list.
  542  * Since the linked list abuses the address tree for storage, the entry
  543  * may not be linked in a map.
  544  *
  545  * *head must be initialized to NULL before the first call to this macro.
  546  * uvm_unmap_detach(*head, 0) will remove dead entries.
  547  */
  548 static inline void
  549 dead_entry_push(struct uvm_map_deadq *deadq, struct vm_map_entry *entry)
  550 {
  551         TAILQ_INSERT_TAIL(deadq, entry, dfree.deadq);
  552 }
  553 #define DEAD_ENTRY_PUSH(_headptr, _entry)                               \
  554         dead_entry_push((_headptr), (_entry))
  555 
  556 /*
  557  * Test if memory starting at addr with sz bytes is free.
  558  *
  559  * Fills in *start_ptr and *end_ptr to be the first and last entry describing
  560  * the space.
  561  * If called with prefilled *start_ptr and *end_ptr, they are to be correct.
  562  */
  563 int
  564 uvm_map_isavail(struct vm_map *map, struct uvm_addr_state *uaddr,
  565     struct vm_map_entry **start_ptr, struct vm_map_entry **end_ptr,
  566     vaddr_t addr, vsize_t sz)
  567 {
  568         struct uvm_addr_state *free;
  569         struct uvm_map_addr *atree;
  570         struct vm_map_entry *i, *i_end;
  571 
  572         if (addr + sz < addr)
  573                 return 0;
  574 
  575         vm_map_assert_anylock(map);
  576 
  577         /*
  578          * Kernel memory above uvm_maxkaddr is considered unavailable.
  579          */
  580         if ((map->flags & VM_MAP_ISVMSPACE) == 0) {
  581                 if (addr + sz > uvm_maxkaddr)
  582                         return 0;
  583         }
  584 
  585         atree = &map->addr;
  586 
  587         /*
  588          * Fill in first, last, so they point at the entries containing the
  589          * first and last address of the range.
  590          * Note that if they are not NULL, we don't perform the lookup.
  591          */
  592         KDASSERT(atree != NULL && start_ptr != NULL && end_ptr != NULL);
  593         if (*start_ptr == NULL) {
  594                 *start_ptr = uvm_map_entrybyaddr(atree, addr);
  595                 if (*start_ptr == NULL)
  596                         return 0;
  597         } else
  598                 KASSERT(*start_ptr == uvm_map_entrybyaddr(atree, addr));
  599         if (*end_ptr == NULL) {
  600                 if (VMMAP_FREE_END(*start_ptr) >= addr + sz)
  601                         *end_ptr = *start_ptr;
  602                 else {
  603                         *end_ptr = uvm_map_entrybyaddr(atree, addr + sz - 1);
  604                         if (*end_ptr == NULL)
  605                                 return 0;
  606                 }
  607         } else
  608                 KASSERT(*end_ptr == uvm_map_entrybyaddr(atree, addr + sz - 1));
  609 
  610         /* Validation. */
  611         KDASSERT(*start_ptr != NULL && *end_ptr != NULL);
  612         KDASSERT((*start_ptr)->start <= addr &&
  613             VMMAP_FREE_END(*start_ptr) > addr &&
  614             (*end_ptr)->start < addr + sz &&
  615             VMMAP_FREE_END(*end_ptr) >= addr + sz);
  616 
  617         /*
  618          * Check the none of the entries intersects with <addr, addr+sz>.
  619          * Also, if the entry belong to uaddr_exe or uaddr_brk_stack, it is
  620          * considered unavailable unless called by those allocators.
  621          */
  622         i = *start_ptr;
  623         i_end = RBT_NEXT(uvm_map_addr, *end_ptr);
  624         for (; i != i_end;
  625             i = RBT_NEXT(uvm_map_addr, i)) {
  626                 if (i->start != i->end && i->end > addr)
  627                         return 0;
  628 
  629                 /*
  630                  * uaddr_exe and uaddr_brk_stack may only be used
  631                  * by these allocators and the NULL uaddr (i.e. no
  632                  * uaddr).
  633                  * Reject if this requirement is not met.
  634                  */
  635                 if (uaddr != NULL) {
  636                         free = uvm_map_uaddr_e(map, i);
  637 
  638                         if (uaddr != free && free != NULL &&
  639                             (free == map->uaddr_exe ||
  640                              free == map->uaddr_brk_stack))
  641                                 return 0;
  642                 }
  643         }
  644 
  645         return -1;
  646 }
  647 
  648 /*
  649  * Invoke each address selector until an address is found.
  650  * Will not invoke uaddr_exe.
  651  */
  652 int
  653 uvm_map_findspace(struct vm_map *map, struct vm_map_entry**first,
  654     struct vm_map_entry**last, vaddr_t *addr, vsize_t sz,
  655     vaddr_t pmap_align, vaddr_t pmap_offset, vm_prot_t prot, vaddr_t hint)
  656 {
  657         struct uvm_addr_state *uaddr;
  658         int i;
  659 
  660         /*
  661          * Allocation for sz bytes at any address,
  662          * using the addr selectors in order.
  663          */
  664         for (i = 0; i < nitems(map->uaddr_any); i++) {
  665                 uaddr = map->uaddr_any[i];
  666 
  667                 if (uvm_addr_invoke(map, uaddr, first, last,
  668                     addr, sz, pmap_align, pmap_offset, prot, hint) == 0)
  669                         return 0;
  670         }
  671 
  672         /* Fall back to brk() and stack() address selectors. */
  673         uaddr = map->uaddr_brk_stack;
  674         if (uvm_addr_invoke(map, uaddr, first, last,
  675             addr, sz, pmap_align, pmap_offset, prot, hint) == 0)
  676                 return 0;
  677 
  678         return ENOMEM;
  679 }
  680 
  681 /* Calculate entry augmentation value. */
  682 vsize_t
  683 uvm_map_addr_augment_get(struct vm_map_entry *entry)
  684 {
  685         vsize_t                  augment;
  686         struct vm_map_entry     *left, *right;
  687 
  688         augment = entry->fspace;
  689         if ((left = RBT_LEFT(uvm_map_addr, entry)) != NULL)
  690                 augment = MAX(augment, left->fspace_augment);
  691         if ((right = RBT_RIGHT(uvm_map_addr, entry)) != NULL)
  692                 augment = MAX(augment, right->fspace_augment);
  693         return augment;
  694 }
  695 
  696 /*
  697  * Update augmentation data in entry.
  698  */
  699 void
  700 uvm_map_addr_augment(struct vm_map_entry *entry)
  701 {
  702         vsize_t                  augment;
  703 
  704         while (entry != NULL) {
  705                 /* Calculate value for augmentation. */
  706                 augment = uvm_map_addr_augment_get(entry);
  707 
  708                 /*
  709                  * Descend update.
  710                  * Once we find an entry that already has the correct value,
  711                  * stop, since it means all its parents will use the correct
  712                  * value too.
  713                  */
  714                 if (entry->fspace_augment == augment)
  715                         return;
  716                 entry->fspace_augment = augment;
  717                 entry = RBT_PARENT(uvm_map_addr, entry);
  718         }
  719 }
  720 
  721 /*
  722  * uvm_mapanon: establish a valid mapping in map for an anon
  723  *
  724  * => *addr and sz must be a multiple of PAGE_SIZE.
  725  * => *addr is ignored, except if flags contains UVM_FLAG_FIXED.
  726  * => map must be unlocked.
  727  *
  728  * => align: align vaddr, must be a power-of-2.
  729  *    Align is only a hint and will be ignored if the alignment fails.
  730  */
  731 int
  732 uvm_mapanon(struct vm_map *map, vaddr_t *addr, vsize_t sz,
  733     vsize_t align, unsigned int flags)
  734 {
  735         struct vm_map_entry     *first, *last, *entry, *new;
  736         struct uvm_map_deadq     dead;
  737         vm_prot_t                prot;
  738         vm_prot_t                maxprot;
  739         vm_inherit_t             inherit;
  740         int                      advice;
  741         int                      error;
  742         vaddr_t                  pmap_align, pmap_offset;
  743         vaddr_t                  hint;
  744 
  745         KASSERT((map->flags & VM_MAP_ISVMSPACE) == VM_MAP_ISVMSPACE);
  746         KASSERT(map != kernel_map);
  747         KASSERT((map->flags & UVM_FLAG_HOLE) == 0);
  748         KASSERT((map->flags & VM_MAP_INTRSAFE) == 0);
  749         splassert(IPL_NONE);
  750         KASSERT((flags & UVM_FLAG_TRYLOCK) == 0);
  751 
  752         /*
  753          * We use pmap_align and pmap_offset as alignment and offset variables.
  754          *
  755          * Because the align parameter takes precedence over pmap prefer,
  756          * the pmap_align will need to be set to align, with pmap_offset = 0,
  757          * if pmap_prefer will not align.
  758          */
  759         pmap_align = MAX(align, PAGE_SIZE);
  760         pmap_offset = 0;
  761 
  762         /* Decode parameters. */
  763         prot = UVM_PROTECTION(flags);
  764         maxprot = UVM_MAXPROTECTION(flags);
  765         advice = UVM_ADVICE(flags);
  766         inherit = UVM_INHERIT(flags);
  767         error = 0;
  768         hint = trunc_page(*addr);
  769         TAILQ_INIT(&dead);
  770         KASSERT((sz & (vaddr_t)PAGE_MASK) == 0);
  771         KASSERT((align & (align - 1)) == 0);
  772 
  773         /* Check protection. */
  774         if ((prot & maxprot) != prot)
  775                 return EACCES;
  776 
  777         /*
  778          * Before grabbing the lock, allocate a map entry for later
  779          * use to ensure we don't wait for memory while holding the
  780          * vm_map_lock.
  781          */
  782         new = uvm_mapent_alloc(map, flags);
  783         if (new == NULL)
  784                 return ENOMEM;
  785 
  786         vm_map_lock(map);
  787         first = last = NULL;
  788         if (flags & UVM_FLAG_FIXED) {
  789                 /*
  790                  * Fixed location.
  791                  *
  792                  * Note: we ignore align, pmap_prefer.
  793                  * Fill in first, last and *addr.
  794                  */
  795                 KASSERT((*addr & PAGE_MASK) == 0);
  796 
  797                 /* Check that the space is available. */
  798                 if (flags & UVM_FLAG_UNMAP) {
  799                         if ((flags & UVM_FLAG_STACK) &&
  800                             !uvm_map_is_stack_remappable(map, *addr, sz,
  801                                 (flags & UVM_FLAG_SIGALTSTACK))) {
  802                                 error = EINVAL;
  803                                 goto unlock;
  804                         }
  805                         if (uvm_unmap_remove(map, *addr, *addr + sz, &dead,
  806                             FALSE, TRUE,
  807                             (flags & UVM_FLAG_SIGALTSTACK) ? FALSE : TRUE) != 0) {
  808                                 error = EPERM;  /* immutable entries found */
  809                                 goto unlock;
  810                         }
  811                 }
  812                 if (!uvm_map_isavail(map, NULL, &first, &last, *addr, sz)) {
  813                         error = ENOMEM;
  814                         goto unlock;
  815                 }
  816         } else if (*addr != 0 && (*addr & PAGE_MASK) == 0 &&
  817             (align == 0 || (*addr & (align - 1)) == 0) &&
  818             uvm_map_isavail(map, NULL, &first, &last, *addr, sz)) {
  819                 /*
  820                  * Address used as hint.
  821                  *
  822                  * Note: we enforce the alignment restriction,
  823                  * but ignore pmap_prefer.
  824                  */
  825         } else if ((prot & PROT_EXEC) != 0 && map->uaddr_exe != NULL) {
  826                 /* Run selection algorithm for executables. */
  827                 error = uvm_addr_invoke(map, map->uaddr_exe, &first, &last,
  828                     addr, sz, pmap_align, pmap_offset, prot, hint);
  829 
  830                 if (error != 0)
  831                         goto unlock;
  832         } else {
  833                 /* Update freelists from vmspace. */
  834                 uvm_map_vmspace_update(map, &dead, flags);
  835 
  836                 error = uvm_map_findspace(map, &first, &last, addr, sz,
  837                     pmap_align, pmap_offset, prot, hint);
  838 
  839                 if (error != 0)
  840                         goto unlock;
  841         }
  842 
  843         /* Double-check if selected address doesn't cause overflow. */
  844         if (*addr + sz < *addr) {
  845                 error = ENOMEM;
  846                 goto unlock;
  847         }
  848 
  849         /* If we only want a query, return now. */
  850         if (flags & UVM_FLAG_QUERY) {
  851                 error = 0;
  852                 goto unlock;
  853         }
  854 
  855         /*
  856          * Create new entry.
  857          * first and last may be invalidated after this call.
  858          */
  859         entry = uvm_map_mkentry(map, first, last, *addr, sz, flags, &dead,
  860             new);
  861         if (entry == NULL) {
  862                 error = ENOMEM;
  863                 goto unlock;
  864         }
  865         new = NULL;
  866         KDASSERT(entry->start == *addr && entry->end == *addr + sz);
  867         entry->object.uvm_obj = NULL;
  868         entry->offset = 0;
  869         entry->protection = prot;
  870         entry->max_protection = maxprot;
  871         entry->inheritance = inherit;
  872         entry->wired_count = 0;
  873         entry->advice = advice;
  874         if (prot & PROT_WRITE)
  875                 map->wserial++;
  876         if (flags & UVM_FLAG_SYSCALL) {
  877                 entry->etype |= UVM_ET_SYSCALL;
  878                 map->wserial++;
  879         }
  880         if (flags & UVM_FLAG_STACK) {
  881                 entry->etype |= UVM_ET_STACK;
  882                 if (flags & (UVM_FLAG_FIXED | UVM_FLAG_UNMAP))
  883                         map->sserial++;
  884         }
  885         if (flags & UVM_FLAG_COPYONW) {
  886                 entry->etype |= UVM_ET_COPYONWRITE;
  887                 if ((flags & UVM_FLAG_OVERLAY) == 0)
  888                         entry->etype |= UVM_ET_NEEDSCOPY;
  889         }
  890         if (flags & UVM_FLAG_CONCEAL)
  891                 entry->etype |= UVM_ET_CONCEAL;
  892         if (flags & UVM_FLAG_OVERLAY) {
  893                 entry->aref.ar_pageoff = 0;
  894                 entry->aref.ar_amap = amap_alloc(sz, M_WAITOK, 0);
  895         }
  896 
  897         /* Update map and process statistics. */
  898         map->size += sz;
  899         if (prot != PROT_NONE) {
  900                 ((struct vmspace *)map)->vm_dused +=
  901                     uvmspace_dused(map, *addr, *addr + sz);
  902         }
  903 
  904 unlock:
  905         vm_map_unlock(map);
  906 
  907         /*
  908          * Remove dead entries.
  909          *
  910          * Dead entries may be the result of merging.
  911          * uvm_map_mkentry may also create dead entries, when it attempts to
  912          * destroy free-space entries.
  913          */
  914         uvm_unmap_detach(&dead, 0);
  915 
  916         if (new)
  917                 uvm_mapent_free(new);
  918         return error;
  919 }
  920 
  921 /*
  922  * uvm_map: establish a valid mapping in map
  923  *
  924  * => *addr and sz must be a multiple of PAGE_SIZE.
  925  * => map must be unlocked.
  926  * => <uobj,uoffset> value meanings (4 cases):
  927  *      [1] <NULL,uoffset>              == uoffset is a hint for PMAP_PREFER
  928  *      [2] <NULL,UVM_UNKNOWN_OFFSET>   == don't PMAP_PREFER
  929  *      [3] <uobj,uoffset>              == normal mapping
  930  *      [4] <uobj,UVM_UNKNOWN_OFFSET>   == uvm_map finds offset based on VA
  931  *
  932  *   case [4] is for kernel mappings where we don't know the offset until
  933  *   we've found a virtual address.   note that kernel object offsets are
  934  *   always relative to vm_map_min(kernel_map).
  935  *
  936  * => align: align vaddr, must be a power-of-2.
  937  *    Align is only a hint and will be ignored if the alignment fails.
  938  */
  939 int
  940 uvm_map(struct vm_map *map, vaddr_t *addr, vsize_t sz,
  941     struct uvm_object *uobj, voff_t uoffset,
  942     vsize_t align, unsigned int flags)
  943 {
  944         struct vm_map_entry     *first, *last, *entry, *new;
  945         struct uvm_map_deadq     dead;
  946         vm_prot_t                prot;
  947         vm_prot_t                maxprot;
  948         vm_inherit_t             inherit;
  949         int                      advice;
  950         int                      error;
  951         vaddr_t                  pmap_align, pmap_offset;
  952         vaddr_t                  hint;
  953 
  954         if ((map->flags & VM_MAP_INTRSAFE) == 0)
  955                 splassert(IPL_NONE);
  956         else
  957                 splassert(IPL_VM);
  958 
  959         /*
  960          * We use pmap_align and pmap_offset as alignment and offset variables.
  961          *
  962          * Because the align parameter takes precedence over pmap prefer,
  963          * the pmap_align will need to be set to align, with pmap_offset = 0,
  964          * if pmap_prefer will not align.
  965          */
  966         if (uoffset == UVM_UNKNOWN_OFFSET) {
  967                 pmap_align = MAX(align, PAGE_SIZE);
  968                 pmap_offset = 0;
  969         } else {
  970                 pmap_align = MAX(PMAP_PREFER_ALIGN(), PAGE_SIZE);
  971                 pmap_offset = PMAP_PREFER_OFFSET(uoffset);
  972 
  973                 if (align == 0 ||
  974                     (align <= pmap_align && (pmap_offset & (align - 1)) == 0)) {
  975                         /* pmap_offset satisfies align, no change. */
  976                 } else {
  977                         /* Align takes precedence over pmap prefer. */
  978                         pmap_align = align;
  979                         pmap_offset = 0;
  980                 }
  981         }
  982 
  983         /* Decode parameters. */
  984         prot = UVM_PROTECTION(flags);
  985         maxprot = UVM_MAXPROTECTION(flags);
  986         advice = UVM_ADVICE(flags);
  987         inherit = UVM_INHERIT(flags);
  988         error = 0;
  989         hint = trunc_page(*addr);
  990         TAILQ_INIT(&dead);
  991         KASSERT((sz & (vaddr_t)PAGE_MASK) == 0);
  992         KASSERT((align & (align - 1)) == 0);
  993 
  994         /* Holes are incompatible with other types of mappings. */
  995         if (flags & UVM_FLAG_HOLE) {
  996                 KASSERT(uobj == NULL && (flags & UVM_FLAG_FIXED) &&
  997                     (flags & (UVM_FLAG_OVERLAY | UVM_FLAG_COPYONW)) == 0);
  998         }
  999 
 1000         /* Unset hint for kernel_map non-fixed allocations. */
 1001         if (!(map->flags & VM_MAP_ISVMSPACE) && !(flags & UVM_FLAG_FIXED))
 1002                 hint = 0;
 1003 
 1004         /* Check protection. */
 1005         if ((prot & maxprot) != prot)
 1006                 return EACCES;
 1007 
 1008         if (map == kernel_map &&
 1009             (prot & (PROT_WRITE | PROT_EXEC)) == (PROT_WRITE | PROT_EXEC))
 1010                 panic("uvm_map: kernel map W^X violation requested");
 1011 
 1012         /*
 1013          * Before grabbing the lock, allocate a map entry for later
 1014          * use to ensure we don't wait for memory while holding the
 1015          * vm_map_lock.
 1016          */
 1017         new = uvm_mapent_alloc(map, flags);
 1018         if (new == NULL)
 1019                 return ENOMEM;
 1020 
 1021         if (flags & UVM_FLAG_TRYLOCK) {
 1022                 if (vm_map_lock_try(map) == FALSE) {
 1023                         error = EFAULT;
 1024                         goto out;
 1025                 }
 1026         } else {
 1027                 vm_map_lock(map);
 1028         }
 1029 
 1030         first = last = NULL;
 1031         if (flags & UVM_FLAG_FIXED) {
 1032                 /*
 1033                  * Fixed location.
 1034                  *
 1035                  * Note: we ignore align, pmap_prefer.
 1036                  * Fill in first, last and *addr.
 1037                  */
 1038                 KASSERT((*addr & PAGE_MASK) == 0);
 1039 
 1040                 /*
 1041                  * Grow pmap to include allocated address.
 1042                  * If the growth fails, the allocation will fail too.
 1043                  */
 1044                 if ((map->flags & VM_MAP_ISVMSPACE) == 0 &&
 1045                     uvm_maxkaddr < (*addr + sz)) {
 1046                         uvm_map_kmem_grow(map, &dead,
 1047                             *addr + sz - uvm_maxkaddr, flags);
 1048                 }
 1049 
 1050                 /* Check that the space is available. */
 1051                 if (flags & UVM_FLAG_UNMAP) {
 1052                         if (uvm_unmap_remove(map, *addr, *addr + sz, &dead,
 1053                             FALSE, TRUE, TRUE) != 0) {
 1054                                 error = EPERM;  /* immutable entries found */
 1055                                 goto unlock;
 1056                         }
 1057                 }
 1058                 if (!uvm_map_isavail(map, NULL, &first, &last, *addr, sz)) {
 1059                         error = ENOMEM;
 1060                         goto unlock;
 1061                 }
 1062         } else if (*addr != 0 && (*addr & PAGE_MASK) == 0 &&
 1063             (map->flags & VM_MAP_ISVMSPACE) == VM_MAP_ISVMSPACE &&
 1064             (align == 0 || (*addr & (align - 1)) == 0) &&
 1065             uvm_map_isavail(map, NULL, &first, &last, *addr, sz)) {
 1066                 /*
 1067                  * Address used as hint.
 1068                  *
 1069                  * Note: we enforce the alignment restriction,
 1070                  * but ignore pmap_prefer.
 1071                  */
 1072         } else if ((prot & PROT_EXEC) != 0 && map->uaddr_exe != NULL) {
 1073                 /* Run selection algorithm for executables. */
 1074                 error = uvm_addr_invoke(map, map->uaddr_exe, &first, &last,
 1075                     addr, sz, pmap_align, pmap_offset, prot, hint);
 1076 
 1077                 /* Grow kernel memory and try again. */
 1078                 if (error != 0 && (map->flags & VM_MAP_ISVMSPACE) == 0) {
 1079                         uvm_map_kmem_grow(map, &dead, sz, flags);
 1080 
 1081                         error = uvm_addr_invoke(map, map->uaddr_exe,
 1082                             &first, &last, addr, sz,
 1083                             pmap_align, pmap_offset, prot, hint);
 1084                 }
 1085 
 1086                 if (error != 0)
 1087                         goto unlock;
 1088         } else {
 1089                 /* Update freelists from vmspace. */
 1090                 if (map->flags & VM_MAP_ISVMSPACE)
 1091                         uvm_map_vmspace_update(map, &dead, flags);
 1092 
 1093                 error = uvm_map_findspace(map, &first, &last, addr, sz,
 1094                     pmap_align, pmap_offset, prot, hint);
 1095 
 1096                 /* Grow kernel memory and try again. */
 1097                 if (error != 0 && (map->flags & VM_MAP_ISVMSPACE) == 0) {
 1098                         uvm_map_kmem_grow(map, &dead, sz, flags);
 1099 
 1100                         error = uvm_map_findspace(map, &first, &last, addr, sz,
 1101                             pmap_align, pmap_offset, prot, hint);
 1102                 }
 1103 
 1104                 if (error != 0)
 1105                         goto unlock;
 1106         }
 1107 
 1108         /* Double-check if selected address doesn't cause overflow. */
 1109         if (*addr + sz < *addr) {
 1110                 error = ENOMEM;
 1111                 goto unlock;
 1112         }
 1113 
 1114         KASSERT((map->flags & VM_MAP_ISVMSPACE) == VM_MAP_ISVMSPACE ||
 1115             uvm_maxkaddr >= *addr + sz);
 1116 
 1117         /* If we only want a query, return now. */
 1118         if (flags & UVM_FLAG_QUERY) {
 1119                 error = 0;
 1120                 goto unlock;
 1121         }
 1122 
 1123         if (uobj == NULL)
 1124                 uoffset = 0;
 1125         else if (uoffset == UVM_UNKNOWN_OFFSET) {
 1126                 KASSERT(UVM_OBJ_IS_KERN_OBJECT(uobj));
 1127                 uoffset = *addr - vm_map_min(kernel_map);
 1128         }
 1129 
 1130         /*
 1131          * Create new entry.
 1132          * first and last may be invalidated after this call.
 1133          */
 1134         entry = uvm_map_mkentry(map, first, last, *addr, sz, flags, &dead,
 1135             new);
 1136         if (entry == NULL) {
 1137                 error = ENOMEM;
 1138                 goto unlock;
 1139         }
 1140         new = NULL;
 1141         KDASSERT(entry->start == *addr && entry->end == *addr + sz);
 1142         entry->object.uvm_obj = uobj;
 1143         entry->offset = uoffset;
 1144         entry->protection = prot;
 1145         entry->max_protection = maxprot;
 1146         entry->inheritance = inherit;
 1147         entry->wired_count = 0;
 1148         entry->advice = advice;
 1149         if (prot & PROT_WRITE)
 1150                 map->wserial++;
 1151         if (flags & UVM_FLAG_SYSCALL) {
 1152                 entry->etype |= UVM_ET_SYSCALL;
 1153                 map->wserial++;
 1154         }
 1155         if (flags & UVM_FLAG_STACK) {
 1156                 entry->etype |= UVM_ET_STACK;
 1157                 if (flags & UVM_FLAG_UNMAP)
 1158                         map->sserial++;
 1159         }
 1160         if (uobj)
 1161                 entry->etype |= UVM_ET_OBJ;
 1162         else if (flags & UVM_FLAG_HOLE)
 1163                 entry->etype |= UVM_ET_HOLE;
 1164         if (flags & UVM_FLAG_NOFAULT)
 1165                 entry->etype |= UVM_ET_NOFAULT;
 1166         if (flags & UVM_FLAG_WC)
 1167                 entry->etype |= UVM_ET_WC;
 1168         if (flags & UVM_FLAG_COPYONW) {
 1169                 entry->etype |= UVM_ET_COPYONWRITE;
 1170                 if ((flags & UVM_FLAG_OVERLAY) == 0)
 1171                         entry->etype |= UVM_ET_NEEDSCOPY;
 1172         }
 1173         if (flags & UVM_FLAG_CONCEAL)
 1174                 entry->etype |= UVM_ET_CONCEAL;
 1175         if (flags & UVM_FLAG_OVERLAY) {
 1176                 entry->aref.ar_pageoff = 0;
 1177                 entry->aref.ar_amap = amap_alloc(sz, M_WAITOK, 0);
 1178         }
 1179 
 1180         /* Update map and process statistics. */
 1181         if (!(flags & UVM_FLAG_HOLE)) {
 1182                 map->size += sz;
 1183                 if ((map->flags & VM_MAP_ISVMSPACE) && uobj == NULL &&
 1184                     prot != PROT_NONE) {
 1185                         ((struct vmspace *)map)->vm_dused +=
 1186                             uvmspace_dused(map, *addr, *addr + sz);
 1187                 }
 1188         }
 1189 
 1190         /*
 1191          * Try to merge entry.
 1192          *
 1193          * Userland allocations are kept separated most of the time.
 1194          * Forego the effort of merging what most of the time can't be merged
 1195          * and only try the merge if it concerns a kernel entry.
 1196          */
 1197         if ((flags & UVM_FLAG_NOMERGE) == 0 &&
 1198             (map->flags & VM_MAP_ISVMSPACE) == 0)
 1199                 uvm_mapent_tryjoin(map, entry, &dead);
 1200 
 1201 unlock:
 1202         vm_map_unlock(map);
 1203 
 1204         /*
 1205          * Remove dead entries.
 1206          *
 1207          * Dead entries may be the result of merging.
 1208          * uvm_map_mkentry may also create dead entries, when it attempts to
 1209          * destroy free-space entries.
 1210          */
 1211         if (map->flags & VM_MAP_INTRSAFE)
 1212                 uvm_unmap_detach_intrsafe(&dead);
 1213         else
 1214                 uvm_unmap_detach(&dead, 0);
 1215 out:
 1216         if (new)
 1217                 uvm_mapent_free(new);
 1218         return error;
 1219 }
 1220 
 1221 /*
 1222  * True iff e1 and e2 can be joined together.
 1223  */
 1224 int
 1225 uvm_mapent_isjoinable(struct vm_map *map, struct vm_map_entry *e1,
 1226     struct vm_map_entry *e2)
 1227 {
 1228         KDASSERT(e1 != NULL && e2 != NULL);
 1229 
 1230         /* Must be the same entry type and not have free memory between. */
 1231         if (e1->etype != e2->etype || e1->end != e2->start)
 1232                 return 0;
 1233 
 1234         /* Submaps are never joined. */
 1235         if (UVM_ET_ISSUBMAP(e1))
 1236                 return 0;
 1237 
 1238         /* Never merge wired memory. */
 1239         if (VM_MAPENT_ISWIRED(e1) || VM_MAPENT_ISWIRED(e2))
 1240                 return 0;
 1241 
 1242         /* Protection, inheritance and advice must be equal. */
 1243         if (e1->protection != e2->protection ||
 1244             e1->max_protection != e2->max_protection ||
 1245             e1->inheritance != e2->inheritance ||
 1246             e1->advice != e2->advice)
 1247                 return 0;
 1248 
 1249         /* If uvm_object: object itself and offsets within object must match. */
 1250         if (UVM_ET_ISOBJ(e1)) {
 1251                 if (e1->object.uvm_obj != e2->object.uvm_obj)
 1252                         return 0;
 1253                 if (e1->offset + (e1->end - e1->start) != e2->offset)
 1254                         return 0;
 1255         }
 1256 
 1257         /*
 1258          * Cannot join shared amaps.
 1259          * Note: no need to lock amap to look at refs, since we don't care
 1260          * about its exact value.
 1261          * If it is 1 (i.e. we have the only reference) it will stay there.
 1262          */
 1263         if (e1->aref.ar_amap && amap_refs(e1->aref.ar_amap) != 1)
 1264                 return 0;
 1265         if (e2->aref.ar_amap && amap_refs(e2->aref.ar_amap) != 1)
 1266                 return 0;
 1267 
 1268         /* Apparently, e1 and e2 match. */
 1269         return 1;
 1270 }
 1271 
 1272 /*
 1273  * Join support function.
 1274  *
 1275  * Returns the merged entry on success.
 1276  * Returns NULL if the merge failed.
 1277  */
 1278 struct vm_map_entry*
 1279 uvm_mapent_merge(struct vm_map *map, struct vm_map_entry *e1,
 1280     struct vm_map_entry *e2, struct uvm_map_deadq *dead)
 1281 {
 1282         struct uvm_addr_state *free;
 1283 
 1284         /*
 1285          * Merging is not supported for map entries that
 1286          * contain an amap in e1. This should never happen
 1287          * anyway, because only kernel entries are merged.
 1288          * These do not contain amaps.
 1289          * e2 contains no real information in its amap,
 1290          * so it can be erased immediately.
 1291          */
 1292         KASSERT(e1->aref.ar_amap == NULL);
 1293 
 1294         /*
 1295          * Don't drop obj reference:
 1296          * uvm_unmap_detach will do this for us.
 1297          */
 1298         free = uvm_map_uaddr_e(map, e1);
 1299         uvm_mapent_free_remove(map, free, e1);
 1300 
 1301         free = uvm_map_uaddr_e(map, e2);
 1302         uvm_mapent_free_remove(map, free, e2);
 1303         uvm_mapent_addr_remove(map, e2);
 1304         e1->end = e2->end;
 1305         e1->guard = e2->guard;
 1306         e1->fspace = e2->fspace;
 1307         uvm_mapent_free_insert(map, free, e1);
 1308 
 1309         DEAD_ENTRY_PUSH(dead, e2);
 1310         return e1;
 1311 }
 1312 
 1313 /*
 1314  * Attempt forward and backward joining of entry.
 1315  *
 1316  * Returns entry after joins.
 1317  * We are guaranteed that the amap of entry is either non-existent or
 1318  * has never been used.
 1319  */
 1320 struct vm_map_entry*
 1321 uvm_mapent_tryjoin(struct vm_map *map, struct vm_map_entry *entry,
 1322     struct uvm_map_deadq *dead)
 1323 {
 1324         struct vm_map_entry *other;
 1325         struct vm_map_entry *merged;
 1326 
 1327         /* Merge with previous entry. */
 1328         other = RBT_PREV(uvm_map_addr, entry);
 1329         if (other && uvm_mapent_isjoinable(map, other, entry)) {
 1330                 merged = uvm_mapent_merge(map, other, entry, dead);
 1331                 if (merged)
 1332                         entry = merged;
 1333         }
 1334 
 1335         /*
 1336          * Merge with next entry.
 1337          *
 1338          * Because amap can only extend forward and the next entry
 1339          * probably contains sensible info, only perform forward merging
 1340          * in the absence of an amap.
 1341          */
 1342         other = RBT_NEXT(uvm_map_addr, entry);
 1343         if (other && entry->aref.ar_amap == NULL &&
 1344             other->aref.ar_amap == NULL &&
 1345             uvm_mapent_isjoinable(map, entry, other)) {
 1346                 merged = uvm_mapent_merge(map, entry, other, dead);
 1347                 if (merged)
 1348                         entry = merged;
 1349         }
 1350 
 1351         return entry;
 1352 }
 1353 
 1354 /*
 1355  * Kill entries that are no longer in a map.
 1356  */
 1357 void
 1358 uvm_unmap_detach(struct uvm_map_deadq *deadq, int flags)
 1359 {
 1360         struct vm_map_entry *entry, *tmp;
 1361         int waitok = flags & UVM_PLA_WAITOK;
 1362 
 1363         TAILQ_FOREACH_SAFE(entry, deadq, dfree.deadq, tmp) {
 1364                 /* Drop reference to amap, if we've got one. */
 1365                 if (entry->aref.ar_amap)
 1366                         amap_unref(entry->aref.ar_amap,
 1367                             entry->aref.ar_pageoff,
 1368                             atop(entry->end - entry->start),
 1369                             flags & AMAP_REFALL);
 1370 
 1371                 /* Skip entries for which we have to grab the kernel lock. */
 1372                 if (UVM_ET_ISSUBMAP(entry) || UVM_ET_ISOBJ(entry))
 1373                         continue;
 1374 
 1375                 TAILQ_REMOVE(deadq, entry, dfree.deadq);
 1376                 uvm_mapent_free(entry);
 1377         }
 1378 
 1379         if (TAILQ_EMPTY(deadq))
 1380                 return;
 1381 
 1382         KERNEL_LOCK();
 1383         while ((entry = TAILQ_FIRST(deadq)) != NULL) {
 1384                 if (waitok)
 1385                         uvm_pause();
 1386                 /* Drop reference to our backing object, if we've got one. */
 1387                 if (UVM_ET_ISSUBMAP(entry)) {
 1388                         /* ... unlikely to happen, but play it safe */
 1389                         uvm_map_deallocate(entry->object.sub_map);
 1390                 } else if (UVM_ET_ISOBJ(entry) &&
 1391                     entry->object.uvm_obj->pgops->pgo_detach) {
 1392                         entry->object.uvm_obj->pgops->pgo_detach(
 1393                             entry->object.uvm_obj);
 1394                 }
 1395 
 1396                 /* Step to next. */
 1397                 TAILQ_REMOVE(deadq, entry, dfree.deadq);
 1398                 uvm_mapent_free(entry);
 1399         }
 1400         KERNEL_UNLOCK();
 1401 }
 1402 
 1403 void
 1404 uvm_unmap_detach_intrsafe(struct uvm_map_deadq *deadq)
 1405 {
 1406         struct vm_map_entry *entry;
 1407 
 1408         while ((entry = TAILQ_FIRST(deadq)) != NULL) {
 1409                 KASSERT(entry->aref.ar_amap == NULL);
 1410                 KASSERT(!UVM_ET_ISSUBMAP(entry));
 1411                 KASSERT(!UVM_ET_ISOBJ(entry));
 1412                 TAILQ_REMOVE(deadq, entry, dfree.deadq);
 1413                 uvm_mapent_free(entry);
 1414         }
 1415 }
 1416 
 1417 /*
 1418  * Create and insert new entry.
 1419  *
 1420  * Returned entry contains new addresses and is inserted properly in the tree.
 1421  * first and last are (probably) no longer valid.
 1422  */
 1423 struct vm_map_entry*
 1424 uvm_map_mkentry(struct vm_map *map, struct vm_map_entry *first,
 1425     struct vm_map_entry *last, vaddr_t addr, vsize_t sz, int flags,
 1426     struct uvm_map_deadq *dead, struct vm_map_entry *new)
 1427 {
 1428         struct vm_map_entry *entry, *prev;
 1429         struct uvm_addr_state *free;
 1430         vaddr_t min, max;       /* free space boundaries for new entry */
 1431 
 1432         KDASSERT(map != NULL);
 1433         KDASSERT(first != NULL);
 1434         KDASSERT(last != NULL);
 1435         KDASSERT(dead != NULL);
 1436         KDASSERT(sz > 0);
 1437         KDASSERT(addr + sz > addr);
 1438         KDASSERT(first->end <= addr && VMMAP_FREE_END(first) > addr);
 1439         KDASSERT(last->start < addr + sz && VMMAP_FREE_END(last) >= addr + sz);
 1440         KDASSERT(uvm_map_isavail(map, NULL, &first, &last, addr, sz));
 1441         uvm_tree_sanity(map, __FILE__, __LINE__);
 1442 
 1443         min = addr + sz;
 1444         max = VMMAP_FREE_END(last);
 1445 
 1446         /* Initialize new entry. */
 1447         if (new == NULL)
 1448                 entry = uvm_mapent_alloc(map, flags);
 1449         else
 1450                 entry = new;
 1451         if (entry == NULL)
 1452                 return NULL;
 1453         entry->offset = 0;
 1454         entry->etype = 0;
 1455         entry->wired_count = 0;
 1456         entry->aref.ar_pageoff = 0;
 1457         entry->aref.ar_amap = NULL;
 1458 
 1459         entry->start = addr;
 1460         entry->end = min;
 1461         entry->guard = 0;
 1462         entry->fspace = 0;
 1463 
 1464         vm_map_assert_wrlock(map);
 1465 
 1466         /* Reset free space in first. */
 1467         free = uvm_map_uaddr_e(map, first);
 1468         uvm_mapent_free_remove(map, free, first);
 1469         first->guard = 0;
 1470         first->fspace = 0;
 1471 
 1472         /*
 1473          * Remove all entries that are fully replaced.
 1474          * We are iterating using last in reverse order.
 1475          */
 1476         for (; first != last; last = prev) {
 1477                 prev = RBT_PREV(uvm_map_addr, last);
 1478 
 1479                 KDASSERT(last->start == last->end);
 1480                 free = uvm_map_uaddr_e(map, last);
 1481                 uvm_mapent_free_remove(map, free, last);
 1482                 uvm_mapent_addr_remove(map, last);
 1483                 DEAD_ENTRY_PUSH(dead, last);
 1484         }
 1485         /* Remove first if it is entirely inside <addr, addr+sz>.  */
 1486         if (first->start == addr) {
 1487                 uvm_mapent_addr_remove(map, first);
 1488                 DEAD_ENTRY_PUSH(dead, first);
 1489         } else {
 1490                 uvm_map_fix_space(map, first, VMMAP_FREE_START(first),
 1491                     addr, flags);
 1492         }
 1493 
 1494         /* Finally, link in entry. */
 1495         uvm_mapent_addr_insert(map, entry);
 1496         uvm_map_fix_space(map, entry, min, max, flags);
 1497 
 1498         uvm_tree_sanity(map, __FILE__, __LINE__);
 1499         return entry;
 1500 }
 1501 
 1502 
 1503 /*
 1504  * uvm_mapent_alloc: allocate a map entry
 1505  */
 1506 struct vm_map_entry *
 1507 uvm_mapent_alloc(struct vm_map *map, int flags)
 1508 {
 1509         struct vm_map_entry *me, *ne;
 1510         int pool_flags;
 1511         int i;
 1512 
 1513         pool_flags = PR_WAITOK;
 1514         if (flags & UVM_FLAG_TRYLOCK)
 1515                 pool_flags = PR_NOWAIT;
 1516 
 1517         if (map->flags & VM_MAP_INTRSAFE || cold) {
 1518                 mtx_enter(&uvm_kmapent_mtx);
 1519                 if (SLIST_EMPTY(&uvm.kentry_free)) {
 1520                         ne = km_alloc(PAGE_SIZE, &kv_page, &kp_dirty,
 1521                             &kd_nowait);
 1522                         if (ne == NULL)
 1523                                 panic("uvm_mapent_alloc: cannot allocate map "
 1524                                     "entry");
 1525                         for (i = 0; i < PAGE_SIZE / sizeof(*ne); i++) {
 1526                                 SLIST_INSERT_HEAD(&uvm.kentry_free,
 1527                                     &ne[i], daddrs.addr_kentry);
 1528                         }
 1529                         if (ratecheck(&uvm_kmapent_last_warn_time,
 1530                             &uvm_kmapent_warn_rate))
 1531                                 printf("uvm_mapent_alloc: out of static "
 1532                                     "map entries\n");
 1533                 }
 1534                 me = SLIST_FIRST(&uvm.kentry_free);
 1535                 SLIST_REMOVE_HEAD(&uvm.kentry_free, daddrs.addr_kentry);
 1536                 uvmexp.kmapent++;
 1537                 mtx_leave(&uvm_kmapent_mtx);
 1538                 me->flags = UVM_MAP_STATIC;
 1539         } else if (map == kernel_map) {
 1540                 splassert(IPL_NONE);
 1541                 me = pool_get(&uvm_map_entry_kmem_pool, pool_flags);
 1542                 if (me == NULL)
 1543                         goto out;
 1544                 me->flags = UVM_MAP_KMEM;
 1545         } else {
 1546                 splassert(IPL_NONE);
 1547                 me = pool_get(&uvm_map_entry_pool, pool_flags);
 1548                 if (me == NULL)
 1549                         goto out;
 1550                 me->flags = 0;
 1551         }
 1552 
 1553         RBT_POISON(uvm_map_addr, me, UVMMAP_DEADBEEF);
 1554 out:
 1555         return me;
 1556 }
 1557 
 1558 /*
 1559  * uvm_mapent_free: free map entry
 1560  *
 1561  * => XXX: static pool for kernel map?
 1562  */
 1563 void
 1564 uvm_mapent_free(struct vm_map_entry *me)
 1565 {
 1566         if (me->flags & UVM_MAP_STATIC) {
 1567                 mtx_enter(&uvm_kmapent_mtx);
 1568                 SLIST_INSERT_HEAD(&uvm.kentry_free, me, daddrs.addr_kentry);
 1569                 uvmexp.kmapent--;
 1570                 mtx_leave(&uvm_kmapent_mtx);
 1571         } else if (me->flags & UVM_MAP_KMEM) {
 1572                 splassert(IPL_NONE);
 1573                 pool_put(&uvm_map_entry_kmem_pool, me);
 1574         } else {
 1575                 splassert(IPL_NONE);
 1576                 pool_put(&uvm_map_entry_pool, me);
 1577         }
 1578 }
 1579 
 1580 /*
 1581  * uvm_map_lookup_entry: find map entry at or before an address.
 1582  *
 1583  * => map must at least be read-locked by caller
 1584  * => entry is returned in "entry"
 1585  * => return value is true if address is in the returned entry
 1586  * ET_HOLE entries are considered to not contain a mapping, ergo FALSE is
 1587  * returned for those mappings.
 1588  */
 1589 boolean_t
 1590 uvm_map_lookup_entry(struct vm_map *map, vaddr_t address,
 1591     struct vm_map_entry **entry)
 1592 {
 1593         vm_map_assert_anylock(map);
 1594 
 1595         *entry = uvm_map_entrybyaddr(&map->addr, address);
 1596         return *entry != NULL && !UVM_ET_ISHOLE(*entry) &&
 1597             (*entry)->start <= address && (*entry)->end > address;
 1598 }
 1599 
 1600 /*
 1601  * Stack must be in a MAP_STACK entry. PROT_NONE indicates stack not yet
 1602  * grown -- then uvm_map_check_region_range() should not cache the entry
 1603  * because growth won't be seen.
 1604  */
 1605 int
 1606 uvm_map_inentry_sp(vm_map_entry_t entry)
 1607 {
 1608         if ((entry->etype & UVM_ET_STACK) == 0) {
 1609                 if (entry->protection == PROT_NONE)
 1610                         return (-1);    /* don't update range */
 1611                 return (0);
 1612         }
 1613         return (1);
 1614 }
 1615 
 1616 /*
 1617  * The system call must not come from a writeable entry, W^X is violated.
 1618  * (Would be nice if we can spot aliasing, which is also kind of bad)
 1619  *
 1620  * The system call must come from an syscall-labeled entry (which are
 1621  * the text regions of the main program, sigtramp, ld.so, or libc).
 1622  */
 1623 int
 1624 uvm_map_inentry_pc(vm_map_entry_t entry)
 1625 {
 1626         if (entry->protection & PROT_WRITE)
 1627                 return (0);     /* not permitted */
 1628         if ((entry->etype & UVM_ET_SYSCALL) == 0)
 1629                 return (0);     /* not permitted */
 1630         return (1);
 1631 }
 1632 
 1633 int
 1634 uvm_map_inentry_recheck(u_long serial, vaddr_t addr, struct p_inentry *ie)
 1635 {
 1636         return (serial != ie->ie_serial || ie->ie_start == 0 ||
 1637             addr < ie->ie_start || addr >= ie->ie_end);
 1638 }
 1639 
 1640 /*
 1641  * Inside a vm_map find the reg address and verify it via function.
 1642  * Remember low and high addresses of region if valid and return TRUE,
 1643  * else return FALSE.
 1644  */
 1645 boolean_t
 1646 uvm_map_inentry_fix(struct proc *p, struct p_inentry *ie, vaddr_t addr,
 1647     int (*fn)(vm_map_entry_t), u_long serial)
 1648 {
 1649         vm_map_t map = &p->p_vmspace->vm_map;
 1650         vm_map_entry_t entry;
 1651         int ret;
 1652 
 1653         if (addr < map->min_offset || addr >= map->max_offset)
 1654                 return (FALSE);
 1655 
 1656         /* lock map */
 1657         vm_map_lock_read(map);
 1658 
 1659         /* lookup */
 1660         if (!uvm_map_lookup_entry(map, trunc_page(addr), &entry)) {
 1661                 vm_map_unlock_read(map);
 1662                 return (FALSE);
 1663         }
 1664 
 1665         ret = (*fn)(entry);
 1666         if (ret == 0) {
 1667                 vm_map_unlock_read(map);
 1668                 return (FALSE);
 1669         } else if (ret == 1) {
 1670                 ie->ie_start = entry->start;
 1671                 ie->ie_end = entry->end;
 1672                 ie->ie_serial = serial;
 1673         } else {
 1674                 /* do not update, re-check later */
 1675         }
 1676         vm_map_unlock_read(map);
 1677         return (TRUE);
 1678 }
 1679 
 1680 boolean_t
 1681 uvm_map_inentry(struct proc *p, struct p_inentry *ie, vaddr_t addr,
 1682     const char *fmt, int (*fn)(vm_map_entry_t), u_long serial)
 1683 {
 1684         union sigval sv;
 1685         boolean_t ok = TRUE;
 1686 
 1687         if (uvm_map_inentry_recheck(serial, addr, ie)) {
 1688                 ok = uvm_map_inentry_fix(p, ie, addr, fn, serial);
 1689                 if (!ok) {
 1690                         KERNEL_LOCK();
 1691                         printf(fmt, p->p_p->ps_comm, p->p_p->ps_pid, p->p_tid,
 1692                             addr, ie->ie_start, ie->ie_end-1);
 1693                         p->p_p->ps_acflag |= AMAP;
 1694                         sv.sival_ptr = (void *)PROC_PC(p);
 1695                         trapsignal(p, SIGSEGV, 0, SEGV_ACCERR, sv);
 1696                         KERNEL_UNLOCK();
 1697                 }
 1698         }
 1699         return (ok);
 1700 }
 1701 
 1702 /*
 1703  * Check whether the given address range can be converted to a MAP_STACK
 1704  * mapping.
 1705  *
 1706  * Must be called with map locked.
 1707  */
 1708 boolean_t
 1709 uvm_map_is_stack_remappable(struct vm_map *map, vaddr_t addr, vaddr_t sz,
 1710     int sigaltstack_check)
 1711 {
 1712         vaddr_t end = addr + sz;
 1713         struct vm_map_entry *first, *iter, *prev = NULL;
 1714 
 1715         vm_map_assert_anylock(map);
 1716 
 1717         if (!uvm_map_lookup_entry(map, addr, &first)) {
 1718                 printf("map stack 0x%lx-0x%lx of map %p failed: no mapping\n",
 1719                     addr, end, map);
 1720                 return FALSE;
 1721         }
 1722 
 1723         /*
 1724          * Check that the address range exists and is contiguous.
 1725          */
 1726         for (iter = first; iter != NULL && iter->start < end;
 1727             prev = iter, iter = RBT_NEXT(uvm_map_addr, iter)) {
 1728                 /*
 1729                  * Make sure that we do not have holes in the range.
 1730                  */
 1731 #if 0
 1732                 if (prev != NULL) {
 1733                         printf("prev->start 0x%lx, prev->end 0x%lx, "
 1734                             "iter->start 0x%lx, iter->end 0x%lx\n",
 1735                             prev->start, prev->end, iter->start, iter->end);
 1736                 }
 1737 #endif
 1738 
 1739                 if (prev != NULL && prev->end != iter->start) {
 1740                         printf("map stack 0x%lx-0x%lx of map %p failed: "
 1741                             "hole in range\n", addr, end, map);
 1742                         return FALSE;
 1743                 }
 1744                 if (iter->start == iter->end || UVM_ET_ISHOLE(iter)) {
 1745                         printf("map stack 0x%lx-0x%lx of map %p failed: "
 1746                             "hole in range\n", addr, end, map);
 1747                         return FALSE;
 1748                 }
 1749                 if (sigaltstack_check) {
 1750                         if ((iter->etype & UVM_ET_SYSCALL))
 1751                                 return FALSE;
 1752                         if (iter->protection != (PROT_READ | PROT_WRITE))
 1753                                 return FALSE;
 1754                 }
 1755         }
 1756 
 1757         return TRUE;
 1758 }
 1759 
 1760 /*
 1761  * Remap the middle-pages of an existing mapping as a stack range.
 1762  * If there exists a previous contiguous mapping with the given range
 1763  * [addr, addr + sz), with protection PROT_READ|PROT_WRITE, then the
 1764  * mapping is dropped, and a new anon mapping is created and marked as
 1765  * a stack.
 1766  *
 1767  * Must be called with map unlocked.
 1768  */
 1769 int
 1770 uvm_map_remap_as_stack(struct proc *p, vaddr_t addr, vaddr_t sz)
 1771 {
 1772         vm_map_t map = &p->p_vmspace->vm_map;
 1773         vaddr_t start, end;
 1774         int error;
 1775         int flags = UVM_MAPFLAG(PROT_READ | PROT_WRITE,
 1776             PROT_READ | PROT_WRITE | PROT_EXEC,
 1777             MAP_INHERIT_COPY, MADV_NORMAL,
 1778             UVM_FLAG_STACK | UVM_FLAG_FIXED | UVM_FLAG_UNMAP |
 1779             UVM_FLAG_COPYONW | UVM_FLAG_SIGALTSTACK);
 1780 
 1781         start = round_page(addr);
 1782         end = trunc_page(addr + sz);
 1783 #ifdef MACHINE_STACK_GROWS_UP
 1784         if (end == addr + sz)
 1785                 end -= PAGE_SIZE;
 1786 #else
 1787         if (start == addr)
 1788                 start += PAGE_SIZE;
 1789 #endif
 1790 
 1791         if (start < map->min_offset || end >= map->max_offset || end < start)
 1792                 return EINVAL;
 1793 
 1794         /*
 1795          * UVM_FLAG_SIGALTSTACK indicates that immutable may be bypassed,
 1796          * but the range is checked that it is contigous, is not a syscall
 1797          * mapping, and protection RW.  Then, a new mapping (all zero) is
 1798          * placed upon the region, which prevents an attacker from pivoting
 1799          * into pre-placed MAP_STACK space.
 1800          */
 1801         error = uvm_mapanon(map, &start, end - start, 0, flags);
 1802         if (error != 0)
 1803                 printf("map stack for pid %d failed\n", p->p_p->ps_pid);
 1804 
 1805         return error;
 1806 }
 1807 
 1808 /*
 1809  * uvm_map_pie: return a random load address for a PIE executable
 1810  * properly aligned.
 1811  */
 1812 #ifndef VM_PIE_MAX_ADDR
 1813 #define VM_PIE_MAX_ADDR (VM_MAXUSER_ADDRESS / 4)
 1814 #endif
 1815 
 1816 #ifndef VM_PIE_MIN_ADDR
 1817 #define VM_PIE_MIN_ADDR VM_MIN_ADDRESS
 1818 #endif
 1819 
 1820 #ifndef VM_PIE_MIN_ALIGN
 1821 #define VM_PIE_MIN_ALIGN PAGE_SIZE
 1822 #endif
 1823 
 1824 vaddr_t
 1825 uvm_map_pie(vaddr_t align)
 1826 {
 1827         vaddr_t addr, space, min;
 1828 
 1829         align = MAX(align, VM_PIE_MIN_ALIGN);
 1830 
 1831         /* round up to next alignment */
 1832         min = (VM_PIE_MIN_ADDR + align - 1) & ~(align - 1);
 1833 
 1834         if (align >= VM_PIE_MAX_ADDR || min >= VM_PIE_MAX_ADDR)
 1835                 return (align);
 1836 
 1837         space = (VM_PIE_MAX_ADDR - min) / align;
 1838         space = MIN(space, (u_int32_t)-1);
 1839 
 1840         addr = (vaddr_t)arc4random_uniform((u_int32_t)space) * align;
 1841         addr += min;
 1842 
 1843         return (addr);
 1844 }
 1845 
 1846 void
 1847 uvm_unmap(struct vm_map *map, vaddr_t start, vaddr_t end)
 1848 {
 1849         struct uvm_map_deadq dead;
 1850 
 1851         KASSERT((start & (vaddr_t)PAGE_MASK) == 0 &&
 1852             (end & (vaddr_t)PAGE_MASK) == 0);
 1853         TAILQ_INIT(&dead);
 1854         vm_map_lock(map);
 1855         uvm_unmap_remove(map, start, end, &dead, FALSE, TRUE, FALSE);
 1856         vm_map_unlock(map);
 1857 
 1858         if (map->flags & VM_MAP_INTRSAFE)
 1859                 uvm_unmap_detach_intrsafe(&dead);
 1860         else
 1861                 uvm_unmap_detach(&dead, 0);
 1862 }
 1863 
 1864 /*
 1865  * Mark entry as free.
 1866  *
 1867  * entry will be put on the dead list.
 1868  * The free space will be merged into the previous or a new entry,
 1869  * unless markfree is false.
 1870  */
 1871 void
 1872 uvm_mapent_mkfree(struct vm_map *map, struct vm_map_entry *entry,
 1873     struct vm_map_entry **prev_ptr, struct uvm_map_deadq *dead,
 1874     boolean_t markfree)
 1875 {
 1876         struct uvm_addr_state   *free;
 1877         struct vm_map_entry     *prev;
 1878         vaddr_t                  addr;  /* Start of freed range. */
 1879         vaddr_t                  end;   /* End of freed range. */
 1880 
 1881         UVM_MAP_REQ_WRITE(map);
 1882 
 1883         prev = *prev_ptr;
 1884         if (prev == entry)
 1885                 *prev_ptr = prev = NULL;
 1886 
 1887         if (prev == NULL ||
 1888             VMMAP_FREE_END(prev) != entry->start)
 1889                 prev = RBT_PREV(uvm_map_addr, entry);
 1890 
 1891         /* Entry is describing only free memory and has nothing to drain into. */
 1892         if (prev == NULL && entry->start == entry->end && markfree) {
 1893                 *prev_ptr = entry;
 1894                 return;
 1895         }
 1896 
 1897         addr = entry->start;
 1898         end = VMMAP_FREE_END(entry);
 1899         free = uvm_map_uaddr_e(map, entry);
 1900         uvm_mapent_free_remove(map, free, entry);
 1901         uvm_mapent_addr_remove(map, entry);
 1902         DEAD_ENTRY_PUSH(dead, entry);
 1903 
 1904         if (markfree) {
 1905                 if (prev) {
 1906                         free = uvm_map_uaddr_e(map, prev);
 1907                         uvm_mapent_free_remove(map, free, prev);
 1908                 }
 1909                 *prev_ptr = uvm_map_fix_space(map, prev, addr, end, 0);
 1910         }
 1911 }
 1912 
 1913 /*
 1914  * Unwire and release referenced amap and object from map entry.
 1915  */
 1916 void
 1917 uvm_unmap_kill_entry_withlock(struct vm_map *map, struct vm_map_entry *entry,
 1918     int needlock)
 1919 {
 1920         /* Unwire removed map entry. */
 1921         if (VM_MAPENT_ISWIRED(entry)) {
 1922                 KERNEL_LOCK();
 1923                 entry->wired_count = 0;
 1924                 uvm_fault_unwire_locked(map, entry->start, entry->end);
 1925                 KERNEL_UNLOCK();
 1926         }
 1927 
 1928         if (needlock)
 1929                 uvm_map_lock_entry(entry);
 1930 
 1931         /* Entry-type specific code. */
 1932         if (UVM_ET_ISHOLE(entry)) {
 1933                 /* Nothing to be done for holes. */
 1934         } else if (map->flags & VM_MAP_INTRSAFE) {
 1935                 KASSERT(vm_map_pmap(map) == pmap_kernel());
 1936 
 1937                 uvm_km_pgremove_intrsafe(entry->start, entry->end);
 1938         } else if (UVM_ET_ISOBJ(entry) &&
 1939             UVM_OBJ_IS_KERN_OBJECT(entry->object.uvm_obj)) {
 1940                 KASSERT(vm_map_pmap(map) == pmap_kernel());
 1941                 /*
 1942                  * Note: kernel object mappings are currently used in
 1943                  * two ways:
 1944                  *  [1] "normal" mappings of pages in the kernel object
 1945                  *  [2] uvm_km_valloc'd allocations in which we
 1946                  *      pmap_enter in some non-kernel-object page
 1947                  *      (e.g. vmapbuf).
 1948                  *
 1949                  * for case [1], we need to remove the mapping from
 1950                  * the pmap and then remove the page from the kernel
 1951                  * object (because, once pages in a kernel object are
 1952                  * unmapped they are no longer needed, unlike, say,
 1953                  * a vnode where you might want the data to persist
 1954                  * until flushed out of a queue).
 1955                  *
 1956                  * for case [2], we need to remove the mapping from
 1957                  * the pmap.  there shouldn't be any pages at the
 1958                  * specified offset in the kernel object [but it
 1959                  * doesn't hurt to call uvm_km_pgremove just to be
 1960                  * safe?]
 1961                  *
 1962                  * uvm_km_pgremove currently does the following:
 1963                  *   for pages in the kernel object range:
 1964                  *     - drops the swap slot
 1965                  *     - uvm_pagefree the page
 1966                  *
 1967                  * note there is version of uvm_km_pgremove() that
 1968                  * is used for "intrsafe" objects.
 1969                  */
 1970                 /*
 1971                  * remove mappings from pmap and drop the pages
 1972                  * from the object.  offsets are always relative
 1973                  * to vm_map_min(kernel_map).
 1974                  */
 1975                 uvm_km_pgremove(entry->object.uvm_obj, entry->start,
 1976                     entry->end);
 1977         } else {
 1978                 /* remove mappings the standard way. */
 1979                 pmap_remove(map->pmap, entry->start, entry->end);
 1980         }
 1981 
 1982         if (needlock)
 1983                 uvm_map_unlock_entry(entry);
 1984 }
 1985 
 1986 void
 1987 uvm_unmap_kill_entry(struct vm_map *map, struct vm_map_entry *entry)
 1988 {
 1989         uvm_unmap_kill_entry_withlock(map, entry, 0);
 1990 }
 1991 
 1992 /*
 1993  * Remove all entries from start to end.
 1994  *
 1995  * If remove_holes, then remove ET_HOLE entries as well.
 1996  * If markfree, entry will be properly marked free, otherwise, no replacement
 1997  * entry will be put in the tree (corrupting the tree).
 1998  */
 1999 int
 2000 uvm_unmap_remove(struct vm_map *map, vaddr_t start, vaddr_t end,
 2001     struct uvm_map_deadq *dead, boolean_t remove_holes,
 2002     boolean_t markfree, boolean_t checkimmutable)
 2003 {
 2004         struct vm_map_entry *prev_hint, *next, *entry;
 2005 
 2006         start = MAX(start, map->min_offset);
 2007         end = MIN(end, map->max_offset);
 2008         if (start >= end)
 2009                 return 0;
 2010 
 2011         vm_map_assert_wrlock(map);
 2012 
 2013         /* Find first affected entry. */
 2014         entry = uvm_map_entrybyaddr(&map->addr, start);
 2015         KDASSERT(entry != NULL && entry->start <= start);
 2016 
 2017         if (checkimmutable) {
 2018                 struct vm_map_entry *entry1 = entry;
 2019 
 2020                 /* Refuse to unmap if any entries are immutable */
 2021                 if (entry1->end <= start)
 2022                         entry1 = RBT_NEXT(uvm_map_addr, entry1);
 2023                 for (; entry1 != NULL && entry1->start < end; entry1 = next) {
 2024                         KDASSERT(entry1->start >= start);
 2025                         next = RBT_NEXT(uvm_map_addr, entry1);
 2026                         /* Treat memory holes as free space. */
 2027                         if (entry1->start == entry1->end || UVM_ET_ISHOLE(entry1))
 2028                                 continue;
 2029                         if (entry1->etype & UVM_ET_IMMUTABLE)
 2030                                 return EPERM;
 2031                 }
 2032         }
 2033 
 2034         if (entry->end <= start && markfree)
 2035                 entry = RBT_NEXT(uvm_map_addr, entry);
 2036         else
 2037                 UVM_MAP_CLIP_START(map, entry, start);
 2038 
 2039         /*
 2040          * Iterate entries until we reach end address.
 2041          * prev_hint hints where the freed space can be appended to.
 2042          */
 2043         prev_hint = NULL;
 2044         for (; entry != NULL && entry->start < end; entry = next) {
 2045                 KDASSERT(entry->start >= start);
 2046                 if (entry->end > end || !markfree)
 2047                         UVM_MAP_CLIP_END(map, entry, end);
 2048                 KDASSERT(entry->start >= start && entry->end <= end);
 2049                 next = RBT_NEXT(uvm_map_addr, entry);
 2050 
 2051                 /* Don't remove holes unless asked to do so. */
 2052                 if (UVM_ET_ISHOLE(entry)) {
 2053                         if (!remove_holes) {
 2054                                 prev_hint = entry;
 2055                                 continue;
 2056                         }
 2057                 }
 2058 
 2059                 /* A stack has been removed.. */
 2060                 if (UVM_ET_ISSTACK(entry) && (map->flags & VM_MAP_ISVMSPACE))
 2061                         map->sserial++;
 2062 
 2063                 /* Kill entry. */
 2064                 uvm_unmap_kill_entry_withlock(map, entry, 1);
 2065 
 2066                 /* Update space usage. */
 2067                 if ((map->flags & VM_MAP_ISVMSPACE) &&
 2068                     entry->object.uvm_obj == NULL &&
 2069                     entry->protection != PROT_NONE &&
 2070                     !UVM_ET_ISHOLE(entry)) {
 2071                         ((struct vmspace *)map)->vm_dused -=
 2072                             uvmspace_dused(map, entry->start, entry->end);
 2073                 }
 2074                 if (!UVM_ET_ISHOLE(entry))
 2075                         map->size -= entry->end - entry->start;
 2076 
 2077                 /* Actual removal of entry. */
 2078                 uvm_mapent_mkfree(map, entry, &prev_hint, dead, markfree);
 2079         }
 2080 
 2081         pmap_update(vm_map_pmap(map));
 2082 
 2083 #ifdef VMMAP_DEBUG
 2084         if (markfree) {
 2085                 for (entry = uvm_map_entrybyaddr(&map->addr, start);
 2086                     entry != NULL && entry->start < end;
 2087                     entry = RBT_NEXT(uvm_map_addr, entry)) {
 2088                         KDASSERT(entry->end <= start ||
 2089                             entry->start == entry->end ||
 2090                             UVM_ET_ISHOLE(entry));
 2091                 }
 2092         } else {
 2093                 vaddr_t a;
 2094                 for (a = start; a < end; a += PAGE_SIZE)
 2095                         KDASSERT(uvm_map_entrybyaddr(&map->addr, a) == NULL);
 2096         }
 2097 #endif
 2098         return 0;
 2099 }
 2100 
 2101 /*
 2102  * Mark all entries from first until end (exclusive) as pageable.
 2103  *
 2104  * Lock must be exclusive on entry and will not be touched.
 2105  */
 2106 void
 2107 uvm_map_pageable_pgon(struct vm_map *map, struct vm_map_entry *first,
 2108     struct vm_map_entry *end, vaddr_t start_addr, vaddr_t end_addr)
 2109 {
 2110         struct vm_map_entry *iter;
 2111 
 2112         for (iter = first; iter != end;
 2113             iter = RBT_NEXT(uvm_map_addr, iter)) {
 2114                 KDASSERT(iter->start >= start_addr && iter->end <= end_addr);
 2115                 if (!VM_MAPENT_ISWIRED(iter) || UVM_ET_ISHOLE(iter))
 2116                         continue;
 2117 
 2118                 iter->wired_count = 0;
 2119                 uvm_fault_unwire_locked(map, iter->start, iter->end);
 2120         }
 2121 }
 2122 
 2123 /*
 2124  * Mark all entries from first until end (exclusive) as wired.
 2125  *
 2126  * Lockflags determines the lock state on return from this function.
 2127  * Lock must be exclusive on entry.
 2128  */
 2129 int
 2130 uvm_map_pageable_wire(struct vm_map *map, struct vm_map_entry *first,
 2131     struct vm_map_entry *end, vaddr_t start_addr, vaddr_t end_addr,
 2132     int lockflags)
 2133 {
 2134         struct vm_map_entry *iter;
 2135 #ifdef DIAGNOSTIC
 2136         unsigned int timestamp_save;
 2137 #endif
 2138         int error;
 2139 
 2140         /*
 2141          * Wire pages in two passes:
 2142          *
 2143          * 1: holding the write lock, we create any anonymous maps that need
 2144          *    to be created.  then we clip each map entry to the region to
 2145          *    be wired and increment its wiring count.
 2146          *
 2147          * 2: we downgrade to a read lock, and call uvm_fault_wire to fault
 2148          *    in the pages for any newly wired area (wired_count == 1).
 2149          *
 2150          *    downgrading to a read lock for uvm_fault_wire avoids a possible
 2151          *    deadlock with another thread that may have faulted on one of
 2152          *    the pages to be wired (it would mark the page busy, blocking
 2153          *    us, then in turn block on the map lock that we hold).
 2154          *    because we keep the read lock on the map, the copy-on-write
 2155          *    status of the entries we modify here cannot change.
 2156          */
 2157         for (iter = first; iter != end;
 2158             iter = RBT_NEXT(uvm_map_addr, iter)) {
 2159                 KDASSERT(iter->start >= start_addr && iter->end <= end_addr);
 2160                 if (UVM_ET_ISHOLE(iter) || iter->start == iter->end ||
 2161                     iter->protection == PROT_NONE)
 2162                         continue;
 2163 
 2164                 /*
 2165                  * Perform actions of vm_map_lookup that need the write lock.
 2166                  * - create an anonymous map for copy-on-write
 2167                  * - anonymous map for zero-fill
 2168                  * Skip submaps.
 2169                  */
 2170                 if (!VM_MAPENT_ISWIRED(iter) && !UVM_ET_ISSUBMAP(iter) &&
 2171                     UVM_ET_ISNEEDSCOPY(iter) &&
 2172                     ((iter->protection & PROT_WRITE) ||
 2173                     iter->object.uvm_obj == NULL)) {
 2174                         amap_copy(map, iter, M_WAITOK,
 2175                             UVM_ET_ISSTACK(iter) ? FALSE : TRUE,
 2176                             iter->start, iter->end);
 2177                 }
 2178                 iter->wired_count++;
 2179         }
 2180 
 2181         /*
 2182          * Pass 2.
 2183          */
 2184 #ifdef DIAGNOSTIC
 2185         timestamp_save = map->timestamp;
 2186 #endif
 2187         vm_map_busy(map);
 2188         vm_map_downgrade(map);
 2189 
 2190         error = 0;
 2191         for (iter = first; error == 0 && iter != end;
 2192             iter = RBT_NEXT(uvm_map_addr, iter)) {
 2193                 if (UVM_ET_ISHOLE(iter) || iter->start == iter->end ||
 2194                     iter->protection == PROT_NONE)
 2195                         continue;
 2196 
 2197                 error = uvm_fault_wire(map, iter->start, iter->end,
 2198                     iter->protection);
 2199         }
 2200 
 2201         if (error) {
 2202                 /*
 2203                  * uvm_fault_wire failure
 2204                  *
 2205                  * Reacquire lock and undo our work.
 2206                  */
 2207                 vm_map_upgrade(map);
 2208                 vm_map_unbusy(map);
 2209 #ifdef DIAGNOSTIC
 2210                 if (timestamp_save != map->timestamp)
 2211                         panic("uvm_map_pageable_wire: stale map");
 2212 #endif
 2213 
 2214                 /*
 2215                  * first is no longer needed to restart loops.
 2216                  * Use it as iterator to unmap successful mappings.
 2217                  */
 2218                 for (; first != iter;
 2219                     first = RBT_NEXT(uvm_map_addr, first)) {
 2220                         if (UVM_ET_ISHOLE(first) ||
 2221                             first->start == first->end ||
 2222                             first->protection == PROT_NONE)
 2223                                 continue;
 2224 
 2225                         first->wired_count--;
 2226                         if (!VM_MAPENT_ISWIRED(first)) {
 2227                                 uvm_fault_unwire_locked(map,
 2228                                     first->start, first->end);
 2229                         }
 2230                 }
 2231 
 2232                 /* decrease counter in the rest of the entries */
 2233                 for (; iter != end;
 2234                     iter = RBT_NEXT(uvm_map_addr, iter)) {
 2235                         if (UVM_ET_ISHOLE(iter) || iter->start == iter->end ||
 2236                             iter->protection == PROT_NONE)
 2237                                 continue;
 2238 
 2239                         iter->wired_count--;
 2240                 }
 2241 
 2242                 if ((lockflags & UVM_LK_EXIT) == 0)
 2243                         vm_map_unlock(map);
 2244                 return error;
 2245         }
 2246 
 2247         /* We are currently holding a read lock. */
 2248         if ((lockflags & UVM_LK_EXIT) == 0) {
 2249                 vm_map_unbusy(map);
 2250                 vm_map_unlock_read(map);
 2251         } else {
 2252                 vm_map_upgrade(map);
 2253                 vm_map_unbusy(map);
 2254 #ifdef DIAGNOSTIC
 2255                 if (timestamp_save != map->timestamp)
 2256                         panic("uvm_map_pageable_wire: stale map");
 2257 #endif
 2258         }
 2259         return 0;
 2260 }
 2261 
 2262 /*
 2263  * uvm_map_pageable: set pageability of a range in a map.
 2264  *
 2265  * Flags:
 2266  * UVM_LK_ENTER: map is already locked by caller
 2267  * UVM_LK_EXIT:  don't unlock map on exit
 2268  *
 2269  * The full range must be in use (entries may not have fspace != 0).
 2270  * UVM_ET_HOLE counts as unmapped.
 2271  */
 2272 int
 2273 uvm_map_pageable(struct vm_map *map, vaddr_t start, vaddr_t end,
 2274     boolean_t new_pageable, int lockflags)
 2275 {
 2276         struct vm_map_entry *first, *last, *tmp;
 2277         int error;
 2278 
 2279         start = trunc_page(start);
 2280         end = round_page(end);
 2281 
 2282         if (start > end)
 2283                 return EINVAL;
 2284         if (start == end)
 2285                 return 0;       /* nothing to do */
 2286         if (start < map->min_offset)
 2287                 return EFAULT; /* why? see first XXX below */
 2288         if (end > map->max_offset)
 2289                 return EINVAL; /* why? see second XXX below */
 2290 
 2291         KASSERT(map->flags & VM_MAP_PAGEABLE);
 2292         if ((lockflags & UVM_LK_ENTER) == 0)
 2293                 vm_map_lock(map);
 2294 
 2295         /*
 2296          * Find first entry.
 2297          *
 2298          * Initial test on start is different, because of the different
 2299          * error returned. Rest is tested further down.
 2300          */
 2301         first = uvm_map_entrybyaddr(&map->addr, start);
 2302         if (first->end <= start || UVM_ET_ISHOLE(first)) {
 2303                 /*
 2304                  * XXX if the first address is not mapped, it is EFAULT?
 2305                  */
 2306                 error = EFAULT;
 2307                 goto out;
 2308         }
 2309 
 2310         /* Check that the range has no holes. */
 2311         for (last = first; last != NULL && last->start < end;
 2312             last = RBT_NEXT(uvm_map_addr, last)) {
 2313                 if (UVM_ET_ISHOLE(last) ||
 2314                     (last->end < end && VMMAP_FREE_END(last) != last->end)) {
 2315                         /*
 2316                          * XXX unmapped memory in range, why is it EINVAL
 2317                          * instead of EFAULT?
 2318                          */
 2319                         error = EINVAL;
 2320                         goto out;
 2321                 }
 2322         }
 2323 
 2324         /*
 2325          * Last ended at the first entry after the range.
 2326          * Move back one step.
 2327          *
 2328          * Note that last may be NULL.
 2329          */
 2330         if (last == NULL) {
 2331                 last = RBT_MAX(uvm_map_addr, &map->addr);
 2332                 if (last->end < end) {
 2333                         error = EINVAL;
 2334                         goto out;
 2335                 }
 2336         } else {
 2337                 KASSERT(last != first);
 2338                 last = RBT_PREV(uvm_map_addr, last);
 2339         }
 2340 
 2341         /* Wire/unwire pages here. */
 2342         if (new_pageable) {
 2343                 /*
 2344                  * Mark pageable.
 2345                  * entries that are not wired are untouched.
 2346                  */
 2347                 if (VM_MAPENT_ISWIRED(first))
 2348                         UVM_MAP_CLIP_START(map, first, start);
 2349                 /*
 2350                  * Split last at end.
 2351                  * Make tmp be the first entry after what is to be touched.
 2352                  * If last is not wired, don't touch it.
 2353                  */
 2354                 if (VM_MAPENT_ISWIRED(last)) {
 2355                         UVM_MAP_CLIP_END(map, last, end);
 2356                         tmp = RBT_NEXT(uvm_map_addr, last);
 2357                 } else
 2358                         tmp = last;
 2359 
 2360                 uvm_map_pageable_pgon(map, first, tmp, start, end);
 2361                 error = 0;
 2362 
 2363 out:
 2364                 if ((lockflags & UVM_LK_EXIT) == 0)
 2365                         vm_map_unlock(map);
 2366                 return error;
 2367         } else {
 2368                 /*
 2369                  * Mark entries wired.
 2370                  * entries are always touched (because recovery needs this).
 2371                  */
 2372                 if (!VM_MAPENT_ISWIRED(first))
 2373                         UVM_MAP_CLIP_START(map, first, start);
 2374                 /*
 2375                  * Split last at end.
 2376                  * Make tmp be the first entry after what is to be touched.
 2377                  * If last is not wired, don't touch it.
 2378                  */
 2379                 if (!VM_MAPENT_ISWIRED(last)) {
 2380                         UVM_MAP_CLIP_END(map, last, end);
 2381                         tmp = RBT_NEXT(uvm_map_addr, last);
 2382                 } else
 2383                         tmp = last;
 2384 
 2385                 return uvm_map_pageable_wire(map, first, tmp, start, end,
 2386                     lockflags);
 2387         }
 2388 }
 2389 
 2390 /*
 2391  * uvm_map_pageable_all: special case of uvm_map_pageable - affects
 2392  * all mapped regions.
 2393  *
 2394  * Map must not be locked.
 2395  * If no flags are specified, all ragions are unwired.
 2396  */
 2397 int
 2398 uvm_map_pageable_all(struct vm_map *map, int flags, vsize_t limit)
 2399 {
 2400         vsize_t size;
 2401         struct vm_map_entry *iter;
 2402 
 2403         KASSERT(map->flags & VM_MAP_PAGEABLE);
 2404         vm_map_lock(map);
 2405 
 2406         if (flags == 0) {
 2407                 uvm_map_pageable_pgon(map, RBT_MIN(uvm_map_addr, &map->addr),
 2408                     NULL, map->min_offset, map->max_offset);
 2409 
 2410                 vm_map_modflags(map, 0, VM_MAP_WIREFUTURE);
 2411                 vm_map_unlock(map);
 2412                 return 0;
 2413         }
 2414 
 2415         if (flags & MCL_FUTURE)
 2416                 vm_map_modflags(map, VM_MAP_WIREFUTURE, 0);
 2417         if (!(flags & MCL_CURRENT)) {
 2418                 vm_map_unlock(map);
 2419                 return 0;
 2420         }
 2421 
 2422         /*
 2423          * Count number of pages in all non-wired entries.
 2424          * If the number exceeds the limit, abort.
 2425          */
 2426         size = 0;
 2427         RBT_FOREACH(iter, uvm_map_addr, &map->addr) {
 2428                 if (VM_MAPENT_ISWIRED(iter) || UVM_ET_ISHOLE(iter))
 2429                         continue;
 2430 
 2431                 size += iter->end - iter->start;
 2432         }
 2433 
 2434         if (atop(size) + uvmexp.wired > uvmexp.wiredmax) {
 2435                 vm_map_unlock(map);
 2436                 return ENOMEM;
 2437         }
 2438 
 2439         /* XXX non-pmap_wired_count case must be handled by caller */
 2440 #ifdef pmap_wired_count
 2441         if (limit != 0 &&
 2442             size + ptoa(pmap_wired_count(vm_map_pmap(map))) > limit) {
 2443                 vm_map_unlock(map);
 2444                 return ENOMEM;
 2445         }
 2446 #endif
 2447 
 2448         /*
 2449          * uvm_map_pageable_wire will release lock
 2450          */
 2451         return uvm_map_pageable_wire(map, RBT_MIN(uvm_map_addr, &map->addr),
 2452             NULL, map->min_offset, map->max_offset, 0);
 2453 }
 2454 
 2455 /*
 2456  * Initialize map.
 2457  *
 2458  * Allocates sufficient entries to describe the free memory in the map.
 2459  */
 2460 void
 2461 uvm_map_setup(struct vm_map *map, pmap_t pmap, vaddr_t min, vaddr_t max,
 2462     int flags)
 2463 {
 2464         int i;
 2465 
 2466         KASSERT((min & (vaddr_t)PAGE_MASK) == 0);
 2467         KASSERT((max & (vaddr_t)PAGE_MASK) == 0 ||
 2468             (max & (vaddr_t)PAGE_MASK) == (vaddr_t)PAGE_MASK);
 2469 
 2470         /*
 2471          * Update parameters.
 2472          *
 2473          * This code handles (vaddr_t)-1 and other page mask ending addresses
 2474          * properly.
 2475          * We lose the top page if the full virtual address space is used.
 2476          */
 2477         if (max & (vaddr_t)PAGE_MASK) {
 2478                 max += 1;
 2479                 if (max == 0) /* overflow */
 2480                         max -= PAGE_SIZE;
 2481         }
 2482 
 2483         RBT_INIT(uvm_map_addr, &map->addr);
 2484         map->uaddr_exe = NULL;
 2485         for (i = 0; i < nitems(map->uaddr_any); ++i)
 2486                 map->uaddr_any[i] = NULL;
 2487         map->uaddr_brk_stack = NULL;
 2488 
 2489         map->pmap = pmap;
 2490         map->size = 0;
 2491         map->ref_count = 0;
 2492         map->min_offset = min;
 2493         map->max_offset = max;
 2494         map->b_start = map->b_end = 0; /* Empty brk() area by default. */
 2495         map->s_start = map->s_end = 0; /* Empty stack area by default. */
 2496         map->flags = flags;
 2497         map->timestamp = 0;
 2498         if (flags & VM_MAP_ISVMSPACE)
 2499                 rw_init_flags(&map->lock, "vmmaplk", RWL_DUPOK);
 2500         else
 2501                 rw_init(&map->lock, "kmmaplk");
 2502         mtx_init(&map->mtx, IPL_VM);
 2503         mtx_init(&map->flags_lock, IPL_VM);
 2504 
 2505         /* Configure the allocators. */
 2506         if (flags & VM_MAP_ISVMSPACE)
 2507                 uvm_map_setup_md(map);
 2508         else
 2509                 map->uaddr_any[3] = &uaddr_kbootstrap;
 2510 
 2511         /*
 2512          * Fill map entries.
 2513          * We do not need to write-lock the map here because only the current
 2514          * thread sees it right now. Initialize ref_count to 0 above to avoid
 2515          * bogus triggering of lock-not-held assertions.
 2516          */
 2517         uvm_map_setup_entries(map);
 2518         uvm_tree_sanity(map, __FILE__, __LINE__);
 2519         map->ref_count = 1;
 2520 }
 2521 
 2522 /*
 2523  * Destroy the map.
 2524  *
 2525  * This is the inverse operation to uvm_map_setup.
 2526  */
 2527 void
 2528 uvm_map_teardown(struct vm_map *map)
 2529 {
 2530         struct uvm_map_deadq     dead_entries;
 2531         struct vm_map_entry     *entry, *tmp;
 2532 #ifdef VMMAP_DEBUG
 2533         size_t                   numq, numt;
 2534 #endif
 2535         int                      i;
 2536 
 2537         KERNEL_ASSERT_LOCKED();
 2538         KERNEL_UNLOCK();
 2539         KERNEL_ASSERT_UNLOCKED();
 2540 
 2541         KASSERT((map->flags & VM_MAP_INTRSAFE) == 0);
 2542 
 2543         vm_map_lock(map);
 2544 
 2545         /* Remove address selectors. */
 2546         uvm_addr_destroy(map->uaddr_exe);
 2547         map->uaddr_exe = NULL;
 2548         for (i = 0; i < nitems(map->uaddr_any); i++) {
 2549                 uvm_addr_destroy(map->uaddr_any[i]);
 2550                 map->uaddr_any[i] = NULL;
 2551         }
 2552         uvm_addr_destroy(map->uaddr_brk_stack);
 2553         map->uaddr_brk_stack = NULL;
 2554 
 2555         /*
 2556          * Remove entries.
 2557          *
 2558          * The following is based on graph breadth-first search.
 2559          *
 2560          * In color terms:
 2561          * - the dead_entries set contains all nodes that are reachable
 2562          *   (i.e. both the black and the grey nodes)
 2563          * - any entry not in dead_entries is white
 2564          * - any entry that appears in dead_entries before entry,
 2565          *   is black, the rest is grey.
 2566          * The set [entry, end] is also referred to as the wavefront.
 2567          *
 2568          * Since the tree is always a fully connected graph, the breadth-first
 2569          * search guarantees that each vmmap_entry is visited exactly once.
 2570          * The vm_map is broken down in linear time.
 2571          */
 2572         TAILQ_INIT(&dead_entries);
 2573         if ((entry = RBT_ROOT(uvm_map_addr, &map->addr)) != NULL)
 2574                 DEAD_ENTRY_PUSH(&dead_entries, entry);
 2575         while (entry != NULL) {
 2576                 sched_pause(yield);
 2577                 uvm_unmap_kill_entry(map, entry);
 2578                 if ((tmp = RBT_LEFT(uvm_map_addr, entry)) != NULL)
 2579                         DEAD_ENTRY_PUSH(&dead_entries, tmp);
 2580                 if ((tmp = RBT_RIGHT(uvm_map_addr, entry)) != NULL)
 2581                         DEAD_ENTRY_PUSH(&dead_entries, tmp);
 2582                 /* Update wave-front. */
 2583                 entry = TAILQ_NEXT(entry, dfree.deadq);
 2584         }
 2585 
 2586         vm_map_unlock(map);
 2587 
 2588 #ifdef VMMAP_DEBUG
 2589         numt = numq = 0;
 2590         RBT_FOREACH(entry, uvm_map_addr, &map->addr)
 2591                 numt++;
 2592         TAILQ_FOREACH(entry, &dead_entries, dfree.deadq)
 2593                 numq++;
 2594         KASSERT(numt == numq);
 2595 #endif
 2596         uvm_unmap_detach(&dead_entries, UVM_PLA_WAITOK);
 2597 
 2598         KERNEL_LOCK();
 2599 
 2600         pmap_destroy(map->pmap);
 2601         map->pmap = NULL;
 2602 }
 2603 
 2604 /*
 2605  * Populate map with free-memory entries.
 2606  *
 2607  * Map must be initialized and empty.
 2608  */
 2609 void
 2610 uvm_map_setup_entries(struct vm_map *map)
 2611 {
 2612         KDASSERT(RBT_EMPTY(uvm_map_addr, &map->addr));
 2613 
 2614         uvm_map_fix_space(map, NULL, map->min_offset, map->max_offset, 0);
 2615 }
 2616 
 2617 /*
 2618  * Split entry at given address.
 2619  *
 2620  * orig:  entry that is to be split.
 2621  * next:  a newly allocated map entry that is not linked.
 2622  * split: address at which the split is done.
 2623  */
 2624 void
 2625 uvm_map_splitentry(struct vm_map *map, struct vm_map_entry *orig,
 2626     struct vm_map_entry *next, vaddr_t split)
 2627 {
 2628         struct uvm_addr_state *free, *free_before;
 2629         vsize_t adj;
 2630 
 2631         if ((split & PAGE_MASK) != 0) {
 2632                 panic("uvm_map_splitentry: split address 0x%lx "
 2633                     "not on page boundary!", split);
 2634         }
 2635         KDASSERT(map != NULL && orig != NULL && next != NULL);
 2636         uvm_tree_sanity(map, __FILE__, __LINE__);
 2637         KASSERT(orig->start < split && VMMAP_FREE_END(orig) > split);
 2638 
 2639 #ifdef VMMAP_DEBUG
 2640         KDASSERT(RBT_FIND(uvm_map_addr, &map->addr, orig) == orig);
 2641         KDASSERT(RBT_FIND(uvm_map_addr, &map->addr, next) != next);
 2642 #endif /* VMMAP_DEBUG */
 2643 
 2644         /*
 2645          * Free space will change, unlink from free space tree.
 2646          */
 2647         free = uvm_map_uaddr_e(map, orig);
 2648         uvm_mapent_free_remove(map, free, orig);
 2649 
 2650         adj = split - orig->start;
 2651 
 2652         uvm_mapent_copy(orig, next);
 2653         if (split >= orig->end) {
 2654                 next->etype = 0;
 2655                 next->offset = 0;
 2656                 next->wired_count = 0;
 2657                 next->start = next->end = split;
 2658                 next->guard = 0;
 2659                 next->fspace = VMMAP_FREE_END(orig) - split;
 2660                 next->aref.ar_amap = NULL;
 2661                 next->aref.ar_pageoff = 0;
 2662                 orig->guard = MIN(orig->guard, split - orig->end);
 2663                 orig->fspace = split - VMMAP_FREE_START(orig);
 2664         } else {
 2665                 orig->fspace = 0;
 2666                 orig->guard = 0;
 2667                 orig->end = next->start = split;
 2668 
 2669                 if (next->aref.ar_amap) {
 2670                         amap_splitref(&orig->aref, &next->aref, adj);
 2671                 }
 2672                 if (UVM_ET_ISSUBMAP(orig)) {
 2673                         uvm_map_reference(next->object.sub_map);
 2674                         next->offset += adj;
 2675                 } else if (UVM_ET_ISOBJ(orig)) {
 2676                         if (next->object.uvm_obj->pgops &&
 2677                             next->object.uvm_obj->pgops->pgo_reference) {
 2678                                 KERNEL_LOCK();
 2679                                 next->object.uvm_obj->pgops->pgo_reference(
 2680                                     next->object.uvm_obj);
 2681                                 KERNEL_UNLOCK();
 2682                         }
 2683                         next->offset += adj;
 2684                 }
 2685         }
 2686 
 2687         /*
 2688          * Link next into address tree.
 2689          * Link orig and next into free-space tree.
 2690          *
 2691          * Don't insert 'next' into the addr tree until orig has been linked,
 2692          * in case the free-list looks at adjecent entries in the addr tree
 2693          * for its decisions.
 2694          */
 2695         if (orig->fspace > 0)
 2696                 free_before = free;
 2697         else
 2698                 free_before = uvm_map_uaddr_e(map, orig);
 2699         uvm_mapent_free_insert(map, free_before, orig);
 2700         uvm_mapent_addr_insert(map, next);
 2701         uvm_mapent_free_insert(map, free, next);
 2702 
 2703         uvm_tree_sanity(map, __FILE__, __LINE__);
 2704 }
 2705 
 2706 
 2707 #ifdef VMMAP_DEBUG
 2708 
 2709 void
 2710 uvm_tree_assert(struct vm_map *map, int test, char *test_str,
 2711     char *file, int line)
 2712 {
 2713         char* map_special;
 2714 
 2715         if (test)
 2716                 return;
 2717 
 2718         if (map == kernel_map)
 2719                 map_special = " (kernel_map)";
 2720         else if (map == kmem_map)
 2721                 map_special = " (kmem_map)";
 2722         else
 2723                 map_special = "";
 2724         panic("uvm_tree_sanity %p%s (%s %d): %s", map, map_special, file,
 2725             line, test_str);
 2726 }
 2727 
 2728 /*
 2729  * Check that map is sane.
 2730  */
 2731 void
 2732 uvm_tree_sanity(struct vm_map *map, char *file, int line)
 2733 {
 2734         struct vm_map_entry     *iter;
 2735         vaddr_t                  addr;
 2736         vaddr_t                  min, max, bound; /* Bounds checker. */
 2737         struct uvm_addr_state   *free;
 2738 
 2739         addr = vm_map_min(map);
 2740         RBT_FOREACH(iter, uvm_map_addr, &map->addr) {
 2741                 /*
 2742                  * Valid start, end.
 2743                  * Catch overflow for end+fspace.
 2744                  */
 2745                 UVM_ASSERT(map, iter->end >= iter->start, file, line);
 2746                 UVM_ASSERT(map, VMMAP_FREE_END(iter) >= iter->end, file, line);
 2747 
 2748                 /* May not be empty. */
 2749                 UVM_ASSERT(map, iter->start < VMMAP_FREE_END(iter),
 2750                     file, line);
 2751 
 2752                 /* Addresses for entry must lie within map boundaries. */
 2753                 UVM_ASSERT(map, iter->start >= vm_map_min(map) &&
 2754                     VMMAP_FREE_END(iter) <= vm_map_max(map), file, line);
 2755 
 2756                 /* Tree may not have gaps. */
 2757                 UVM_ASSERT(map, iter->start == addr, file, line);
 2758                 addr = VMMAP_FREE_END(iter);
 2759 
 2760                 /*
 2761                  * Free space may not cross boundaries, unless the same
 2762                  * free list is used on both sides of the border.
 2763                  */
 2764                 min = VMMAP_FREE_START(iter);
 2765                 max = VMMAP_FREE_END(iter);
 2766 
 2767                 while (min < max &&
 2768                     (bound = uvm_map_boundary(map, min, max)) != max) {
 2769                         UVM_ASSERT(map,
 2770                             uvm_map_uaddr(map, bound - 1) ==
 2771                             uvm_map_uaddr(map, bound),
 2772                             file, line);
 2773                         min = bound;
 2774                 }
 2775 
 2776                 free = uvm_map_uaddr_e(map, iter);
 2777                 if (free) {
 2778                         UVM_ASSERT(map, (iter->etype & UVM_ET_FREEMAPPED) != 0,
 2779                             file, line);
 2780                 } else {
 2781                         UVM_ASSERT(map, (iter->etype & UVM_ET_FREEMAPPED) == 0,
 2782                             file, line);
 2783                 }
 2784         }
 2785         UVM_ASSERT(map, addr == vm_map_max(map), file, line);
 2786 }
 2787 
 2788 void
 2789 uvm_tree_size_chk(struct vm_map *map, char *file, int line)
 2790 {
 2791         struct vm_map_entry *iter;
 2792         vsize_t size;
 2793 
 2794         size = 0;
 2795         RBT_FOREACH(iter, uvm_map_addr, &map->addr) {
 2796                 if (!UVM_ET_ISHOLE(iter))
 2797                         size += iter->end - iter->start;
 2798         }
 2799 
 2800         if (map->size != size)
 2801                 printf("map size = 0x%lx, should be 0x%lx\n", map->size, size);
 2802         UVM_ASSERT(map, map->size == size, file, line);
 2803 
 2804         vmspace_validate(map);
 2805 }
 2806 
 2807 /*
 2808  * This function validates the statistics on vmspace.
 2809  */
 2810 void
 2811 vmspace_validate(struct vm_map *map)
 2812 {
 2813         struct vmspace *vm;
 2814         struct vm_map_entry *iter;
 2815         vaddr_t imin, imax;
 2816         vaddr_t stack_begin, stack_end; /* Position of stack. */
 2817         vsize_t stack, heap; /* Measured sizes. */
 2818 
 2819         if (!(map->flags & VM_MAP_ISVMSPACE))
 2820                 return;
 2821 
 2822         vm = (struct vmspace *)map;
 2823         stack_begin = MIN((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr);
 2824         stack_end = MAX((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr);
 2825 
 2826         stack = heap = 0;
 2827         RBT_FOREACH(iter, uvm_map_addr, &map->addr) {
 2828                 imin = imax = iter->start;
 2829 
 2830                 if (UVM_ET_ISHOLE(iter) || iter->object.uvm_obj != NULL ||
 2831                     iter->protection != PROT_NONE)
 2832                         continue;
 2833 
 2834                 /*
 2835                  * Update stack, heap.
 2836                  * Keep in mind that (theoretically) the entries of
 2837                  * userspace and stack may be joined.
 2838                  */
 2839                 while (imin != iter->end) {
 2840                         /*
 2841                          * Set imax to the first boundary crossed between
 2842                          * imin and stack addresses.
 2843                          */
 2844                         imax = iter->end;
 2845                         if (imin < stack_begin && imax > stack_begin)
 2846                                 imax = stack_begin;
 2847                         else if (imin < stack_end && imax > stack_end)
 2848                                 imax = stack_end;
 2849 
 2850                         if (imin >= stack_begin && imin < stack_end)
 2851                                 stack += imax - imin;
 2852                         else
 2853                                 heap += imax - imin;
 2854                         imin = imax;
 2855                 }
 2856         }
 2857 
 2858         heap >>= PAGE_SHIFT;
 2859         if (heap != vm->vm_dused) {
 2860                 printf("vmspace stack range: 0x%lx-0x%lx\n",
 2861                     stack_begin, stack_end);
 2862                 panic("vmspace_validate: vmspace.vm_dused invalid, "
 2863                     "expected %ld pgs, got %d pgs in map %p",
 2864                     heap, vm->vm_dused,
 2865                     map);
 2866         }
 2867 }
 2868 
 2869 #endif /* VMMAP_DEBUG */
 2870 
 2871 /*
 2872  * uvm_map_init: init mapping system at boot time.   note that we allocate
 2873  * and init the static pool of structs vm_map_entry for the kernel here.
 2874  */
 2875 void
 2876 uvm_map_init(void)
 2877 {
 2878         static struct vm_map_entry kernel_map_entry[MAX_KMAPENT];
 2879         int lcv;
 2880 
 2881         /* now set up static pool of kernel map entries ... */
 2882         mtx_init(&uvm_kmapent_mtx, IPL_VM);
 2883         SLIST_INIT(&uvm.kentry_free);
 2884         for (lcv = 0 ; lcv < MAX_KMAPENT ; lcv++) {
 2885                 SLIST_INSERT_HEAD(&uvm.kentry_free,
 2886                     &kernel_map_entry[lcv], daddrs.addr_kentry);
 2887         }
 2888 
 2889         /* initialize the map-related pools. */
 2890         pool_init(&uvm_vmspace_pool, sizeof(struct vmspace), 0,
 2891             IPL_NONE, PR_WAITOK, "vmsppl", NULL);
 2892         pool_init(&uvm_map_entry_pool, sizeof(struct vm_map_entry), 0,
 2893             IPL_VM, PR_WAITOK, "vmmpepl", NULL);
 2894         pool_init(&uvm_map_entry_kmem_pool, sizeof(struct vm_map_entry), 0,
 2895             IPL_VM, 0, "vmmpekpl", NULL);
 2896         pool_sethiwat(&uvm_map_entry_pool, 8192);
 2897 
 2898         uvm_addr_init();
 2899 }
 2900 
 2901 #if defined(DDB)
 2902 
 2903 /*
 2904  * DDB hooks
 2905  */
 2906 
 2907 /*
 2908  * uvm_map_printit: actually prints the map
 2909  */
 2910 void
 2911 uvm_map_printit(struct vm_map *map, boolean_t full,
 2912     int (*pr)(const char *, ...))
 2913 {
 2914         struct vmspace                  *vm;
 2915         struct vm_map_entry             *entry;
 2916         struct uvm_addr_state           *free;
 2917         int                              in_free, i;
 2918         char                             buf[8];
 2919 
 2920         (*pr)("MAP %p: [0x%lx->0x%lx]\n", map, map->min_offset,map->max_offset);
 2921         (*pr)("\tbrk() allocate range: 0x%lx-0x%lx\n",
 2922             map->b_start, map->b_end);
 2923         (*pr)("\tstack allocate range: 0x%lx-0x%lx\n",
 2924             map->s_start, map->s_end);
 2925         (*pr)("\tsz=%u, ref=%d, version=%u, flags=0x%x\n",
 2926             map->size, map->ref_count, map->timestamp,
 2927             map->flags);
 2928         (*pr)("\tpmap=%p(resident=%d)\n", map->pmap,
 2929             pmap_resident_count(map->pmap));
 2930 
 2931         /* struct vmspace handling. */
 2932         if (map->flags & VM_MAP_ISVMSPACE) {
 2933                 vm = (struct vmspace *)map;
 2934 
 2935                 (*pr)("\tvm_refcnt=%d vm_shm=%p vm_rssize=%u vm_swrss=%u\n",
 2936                     vm->vm_refcnt, vm->vm_shm, vm->vm_rssize, vm->vm_swrss);
 2937                 (*pr)("\tvm_tsize=%u vm_dsize=%u\n",
 2938                     vm->vm_tsize, vm->vm_dsize);
 2939                 (*pr)("\tvm_taddr=%p vm_daddr=%p\n",
 2940                     vm->vm_taddr, vm->vm_daddr);
 2941                 (*pr)("\tvm_maxsaddr=%p vm_minsaddr=%p\n",
 2942                     vm->vm_maxsaddr, vm->vm_minsaddr);
 2943         }
 2944 
 2945         if (!full)
 2946                 goto print_uaddr;
 2947         RBT_FOREACH(entry, uvm_map_addr, &map->addr) {
 2948                 (*pr)(" - %p: 0x%lx->0x%lx: obj=%p/0x%llx, amap=%p/%d\n",
 2949                     entry, entry->start, entry->end, entry->object.uvm_obj,
 2950                     (long long)entry->offset, entry->aref.ar_amap,
 2951                     entry->aref.ar_pageoff);
 2952                 (*pr)("\tsubmap=%c, cow=%c, nc=%c, stack=%c, "
 2953                     "syscall=%c, prot(max)=%d/%d, inh=%d, "
 2954                     "wc=%d, adv=%d\n",
 2955                     (entry->etype & UVM_ET_SUBMAP) ? 'T' : 'F',
 2956                     (entry->etype & UVM_ET_COPYONWRITE) ? 'T' : 'F',
 2957                     (entry->etype & UVM_ET_NEEDSCOPY) ? 'T' : 'F',
 2958                     (entry->etype & UVM_ET_STACK) ? 'T' : 'F',
 2959                     (entry->etype & UVM_ET_SYSCALL) ? 'T' : 'F',
 2960                     entry->protection, entry->max_protection,
 2961                     entry->inheritance, entry->wired_count, entry->advice);
 2962 
 2963                 free = uvm_map_uaddr_e(map, entry);
 2964                 in_free = (free != NULL);
 2965                 (*pr)("\thole=%c, free=%c, guard=0x%lx, "
 2966                     "free=0x%lx-0x%lx\n",
 2967                     (entry->etype & UVM_ET_HOLE) ? 'T' : 'F',
 2968                     in_free ? 'T' : 'F',
 2969                     entry->guard,
 2970                     VMMAP_FREE_START(entry), VMMAP_FREE_END(entry));
 2971                 (*pr)("\tfspace_augment=%lu\n", entry->fspace_augment);
 2972                 (*pr)("\tfreemapped=%c, uaddr=%p\n",
 2973                     (entry->etype & UVM_ET_FREEMAPPED) ? 'T' : 'F', free);
 2974                 if (free) {
 2975                         (*pr)("\t\t(0x%lx-0x%lx %s)\n",
 2976                             free->uaddr_minaddr, free->uaddr_maxaddr,
 2977                             free->uaddr_functions->uaddr_name);
 2978                 }
 2979         }
 2980 
 2981 print_uaddr:
 2982         uvm_addr_print(map->uaddr_exe, "exe", full, pr);
 2983         for (i = 0; i < nitems(map->uaddr_any); i++) {
 2984                 snprintf(&buf[0], sizeof(buf), "any[%d]", i);
 2985                 uvm_addr_print(map->uaddr_any[i], &buf[0], full, pr);
 2986         }
 2987         uvm_addr_print(map->uaddr_brk_stack, "brk/stack", full, pr);
 2988 }
 2989 
 2990 /*
 2991  * uvm_object_printit: actually prints the object
 2992  */
 2993 void
 2994 uvm_object_printit(struct uvm_object *uobj, boolean_t full,
 2995     int (*pr)(const char *, ...))
 2996 {
 2997         struct vm_page *pg;
 2998         int cnt = 0;
 2999 
 3000         (*pr)("OBJECT %p: pgops=%p, npages=%d, ",
 3001             uobj, uobj->pgops, uobj->uo_npages);
 3002         if (UVM_OBJ_IS_KERN_OBJECT(uobj))
 3003                 (*pr)("refs=<SYSTEM>\n");
 3004         else
 3005                 (*pr)("refs=%d\n", uobj->uo_refs);
 3006 
 3007         if (!full) {
 3008                 return;
 3009         }
 3010         (*pr)("  PAGES <pg,offset>:\n  ");
 3011         RBT_FOREACH(pg, uvm_objtree, &uobj->memt) {
 3012                 (*pr)("<%p,0x%llx> ", pg, (long long)pg->offset);
 3013                 if ((cnt % 3) == 2) {
 3014                         (*pr)("\n  ");
 3015                 }
 3016                 cnt++;
 3017         }
 3018         if ((cnt % 3) != 2) {
 3019                 (*pr)("\n");
 3020         }
 3021 }
 3022 
 3023 /*
 3024  * uvm_page_printit: actually print the page
 3025  */
 3026 static const char page_flagbits[] =
 3027         "\2\1BUSY\2WANTED\3TABLED\4CLEAN\5CLEANCHK\6RELEASED\7FAKE\10RDONLY"
 3028         "\11ZERO\12DEV\15PAGER1\21FREE\22INACTIVE\23ACTIVE\25ANON\26AOBJ"
 3029         "\27ENCRYPT\31PMAP0\32PMAP1\33PMAP2\34PMAP3\35PMAP4\36PMAP5";
 3030 
 3031 void
 3032 uvm_page_printit(struct vm_page *pg, boolean_t full,
 3033     int (*pr)(const char *, ...))
 3034 {
 3035         struct vm_page *tpg;
 3036         struct uvm_object *uobj;
 3037         struct pglist *pgl;
 3038 
 3039         (*pr)("PAGE %p:\n", pg);
 3040         (*pr)("  flags=%b, vers=%d, wire_count=%d, pa=0x%llx\n",
 3041             pg->pg_flags, page_flagbits, pg->pg_version, pg->wire_count,
 3042             (long long)pg->phys_addr);
 3043         (*pr)("  uobject=%p, uanon=%p, offset=0x%llx\n",
 3044             pg->uobject, pg->uanon, (long long)pg->offset);
 3045 #if defined(UVM_PAGE_TRKOWN)
 3046         if (pg->pg_flags & PG_BUSY)
 3047                 (*pr)("  owning thread = %d, tag=%s",
 3048                     pg->owner, pg->owner_tag);
 3049         else
 3050                 (*pr)("  page not busy, no owner");
 3051 #else
 3052         (*pr)("  [page ownership tracking disabled]");
 3053 #endif
 3054         (*pr)("\tvm_page_md %p\n", &pg->mdpage);
 3055 
 3056         if (!full)
 3057                 return;
 3058 
 3059         /* cross-verify object/anon */
 3060         if ((pg->pg_flags & PQ_FREE) == 0) {
 3061                 if (pg->pg_flags & PQ_ANON) {
 3062                         if (pg->uanon == NULL || pg->uanon->an_page != pg)
 3063                             (*pr)("  >>> ANON DOES NOT POINT HERE <<< (%p)\n",
 3064                                 (pg->uanon) ? pg->uanon->an_page : NULL);
 3065                         else
 3066                                 (*pr)("  anon backpointer is OK\n");
 3067                 } else {
 3068                         uobj = pg->uobject;
 3069                         if (uobj) {
 3070                                 (*pr)("  checking object list\n");
 3071                                 RBT_FOREACH(tpg, uvm_objtree, &uobj->memt) {
 3072                                         if (tpg == pg) {
 3073                                                 break;
 3074                                         }
 3075                                 }
 3076                                 if (tpg)
 3077                                         (*pr)("  page found on object list\n");
 3078                                 else
 3079                                         (*pr)("  >>> PAGE NOT FOUND "
 3080                                             "ON OBJECT LIST! <<<\n");
 3081                         }
 3082                 }
 3083         }
 3084 
 3085         /* cross-verify page queue */
 3086         if (pg->pg_flags & PQ_FREE) {
 3087                 if (uvm_pmr_isfree(pg))
 3088                         (*pr)("  page found in uvm_pmemrange\n");
 3089                 else
 3090                         (*pr)("  >>> page not found in uvm_pmemrange <<<\n");
 3091                 pgl = NULL;
 3092         } else if (pg->pg_flags & PQ_INACTIVE) {
 3093                 pgl = &uvm.page_inactive;
 3094         } else if (pg->pg_flags & PQ_ACTIVE) {
 3095                 pgl = &uvm.page_active;
 3096         } else {
 3097                 pgl = NULL;
 3098         }
 3099 
 3100         if (pgl) {
 3101                 (*pr)("  checking pageq list\n");
 3102                 TAILQ_FOREACH(tpg, pgl, pageq) {
 3103                         if (tpg == pg) {
 3104                                 break;
 3105                         }
 3106                 }
 3107                 if (tpg)
 3108                         (*pr)("  page found on pageq list\n");
 3109                 else
 3110                         (*pr)("  >>> PAGE NOT FOUND ON PAGEQ LIST! <<<\n");
 3111         }
 3112 }
 3113 #endif
 3114 
 3115 /*
 3116  * uvm_map_protect: change map protection
 3117  *
 3118  * => set_max means set max_protection.
 3119  * => map must be unlocked.
 3120  */
 3121 int
 3122 uvm_map_protect(struct vm_map *map, vaddr_t start, vaddr_t end,
 3123     vm_prot_t new_prot, int etype, boolean_t set_max, boolean_t checkimmutable)
 3124 {
 3125         struct vm_map_entry *first, *iter;
 3126         vm_prot_t old_prot;
 3127         vm_prot_t mask;
 3128         vsize_t dused;
 3129         int error;
 3130 
 3131         KASSERT((etype & ~UVM_ET_STACK) == 0);  /* only UVM_ET_STACK allowed */
 3132 
 3133         if (start > end)
 3134                 return EINVAL;
 3135         start = MAX(start, map->min_offset);
 3136         end = MIN(end, map->max_offset);
 3137         if (start >= end)
 3138                 return 0;
 3139 
 3140         dused = 0;
 3141         error = 0;
 3142         vm_map_lock(map);
 3143 
 3144         /*
 3145          * Set up first and last.
 3146          * - first will contain first entry at or after start.
 3147          */
 3148         first = uvm_map_entrybyaddr(&map->addr, start);
 3149         KDASSERT(first != NULL);
 3150         if (first->end <= start)
 3151                 first = RBT_NEXT(uvm_map_addr, first);
 3152 
 3153         /* First, check for protection violations. */
 3154         for (iter = first; iter != NULL && iter->start < end;
 3155             iter = RBT_NEXT(uvm_map_addr, iter)) {
 3156                 /* Treat memory holes as free space. */
 3157                 if (iter->start == iter->end || UVM_ET_ISHOLE(iter))
 3158                         continue;
 3159 
 3160                 if (checkimmutable &&
 3161                     (iter->etype & UVM_ET_IMMUTABLE)) {
 3162                         if (iter->protection == (PROT_READ | PROT_WRITE) &&
 3163                             new_prot == PROT_READ) {
 3164                                 /* Permit RW to R as a data-locking mechanism */
 3165                                 ;
 3166                         } else {
 3167                                 error = EPERM;
 3168                                 goto out;
 3169                         }
 3170                 }
 3171                 old_prot = iter->protection;
 3172                 if (old_prot == PROT_NONE && new_prot != old_prot) {
 3173                         dused += uvmspace_dused(
 3174                             map, MAX(start, iter->start), MIN(end, iter->end));
 3175                 }
 3176 
 3177                 if (UVM_ET_ISSUBMAP(iter)) {
 3178                         error = EINVAL;
 3179                         goto out;
 3180                 }
 3181                 if ((new_prot & iter->max_protection) != new_prot) {
 3182                         error = EACCES;
 3183                         goto out;
 3184                 }
 3185                 if (map == kernel_map &&
 3186                     (new_prot & (PROT_WRITE | PROT_EXEC)) == (PROT_WRITE | PROT_EXEC))
 3187                         panic("uvm_map_protect: kernel map W^X violation requested");
 3188         }
 3189 
 3190         /* Check limits. */
 3191         if (dused > 0 && (map->flags & VM_MAP_ISVMSPACE)) {
 3192                 vsize_t limit = lim_cur(RLIMIT_DATA);
 3193                 dused = ptoa(dused);
 3194                 if (limit < dused ||
 3195                     limit - dused < ptoa(((struct vmspace *)map)->vm_dused)) {
 3196                         error = ENOMEM;
 3197                         goto out;
 3198                 }
 3199         }
 3200 
 3201         /* only apply UVM_ET_STACK on a mapping changing to RW */
 3202         if (etype && new_prot != (PROT_READ|PROT_WRITE))
 3203                 etype = 0;
 3204 
 3205         /* Fix protections.  */
 3206         for (iter = first; iter != NULL && iter->start < end;
 3207             iter = RBT_NEXT(uvm_map_addr, iter)) {
 3208                 /* Treat memory holes as free space. */
 3209                 if (iter->start == iter->end || UVM_ET_ISHOLE(iter))
 3210                         continue;
 3211 
 3212                 old_prot = iter->protection;
 3213 
 3214                 /*
 3215                  * Skip adapting protection iff old and new protection
 3216                  * are equal.
 3217                  */
 3218                 if (set_max) {
 3219                         if (old_prot == (new_prot & old_prot) &&
 3220                             iter->max_protection == new_prot)
 3221                                 continue;
 3222                 } else {
 3223                         if (old_prot == new_prot)
 3224                                 continue;
 3225                 }
 3226 
 3227                 UVM_MAP_CLIP_START(map, iter, start);
 3228                 UVM_MAP_CLIP_END(map, iter, end);
 3229 
 3230                 if (set_max) {
 3231                         iter->max_protection = new_prot;
 3232                         iter->protection &= new_prot;
 3233                 } else
 3234                         iter->protection = new_prot;
 3235                 iter->etype |= etype;   /* potentially add UVM_ET_STACK */
 3236 
 3237                 /*
 3238                  * update physical map if necessary.  worry about copy-on-write
 3239                  * here -- CHECK THIS XXX
 3240                  */
 3241                 if (iter->protection != old_prot) {
 3242                         mask = UVM_ET_ISCOPYONWRITE(iter) ?
 3243                             ~PROT_WRITE : PROT_MASK;
 3244 
 3245                         /* XXX should only wserial++ if no split occurs */
 3246                         if (iter->protection & PROT_WRITE)
 3247                                 map->wserial++;
 3248 
 3249                         if (map->flags & VM_MAP_ISVMSPACE) {
 3250                                 if (old_prot == PROT_NONE) {
 3251                                         ((struct vmspace *)map)->vm_dused +=
 3252                                             uvmspace_dused(map, iter->start,
 3253                                                 iter->end);
 3254                                 }
 3255                                 if (iter->protection == PROT_NONE) {
 3256                                         ((struct vmspace *)map)->vm_dused -=
 3257                                             uvmspace_dused(map, iter->start,
 3258                                                 iter->end);
 3259                                 }
 3260                         }
 3261 
 3262                         /* update pmap */
 3263                         if ((iter->protection & mask) == PROT_NONE &&
 3264                             VM_MAPENT_ISWIRED(iter)) {
 3265                                 /*
 3266                                  * TODO(ariane) this is stupid. wired_count
 3267                                  * is 0 if not wired, otherwise anything
 3268                                  * larger than 0 (incremented once each time
 3269                                  * wire is called).
 3270                                  * Mostly to be able to undo the damage on
 3271                                  * failure. Not the actually be a wired
 3272                                  * refcounter...
 3273                                  * Originally: iter->wired_count--;
 3274                                  * (don't we have to unwire this in the pmap
 3275                                  * as well?)
 3276                                  */
 3277                                 iter->wired_count = 0;
 3278                         }
 3279                         uvm_map_lock_entry(iter);
 3280                         pmap_protect(map->pmap, iter->start, iter->end,
 3281                             iter->protection & mask);
 3282                         uvm_map_unlock_entry(iter);
 3283                 }
 3284 
 3285                 /*
 3286                  * If the map is configured to lock any future mappings,
 3287                  * wire this entry now if the old protection was PROT_NONE
 3288                  * and the new protection is not PROT_NONE.
 3289                  */
 3290                 if ((map->flags & VM_MAP_WIREFUTURE) != 0 &&
 3291                     VM_MAPENT_ISWIRED(iter) == 0 &&
 3292                     old_prot == PROT_NONE &&
 3293                     new_prot != PROT_NONE) {
 3294                         if (uvm_map_pageable(map, iter->start, iter->end,
 3295                             FALSE, UVM_LK_ENTER | UVM_LK_EXIT) != 0) {
 3296                                 /*
 3297                                  * If locking the entry fails, remember the
 3298                                  * error if it's the first one.  Note we
 3299                                  * still continue setting the protection in
 3300                                  * the map, but it will return the resource
 3301                                  * storage condition regardless.
 3302                                  *
 3303                                  * XXX Ignore what the actual error is,
 3304                                  * XXX just call it a resource shortage
 3305                                  * XXX so that it doesn't get confused
 3306                                  * XXX what uvm_map_protect() itself would
 3307                                  * XXX normally return.
 3308                                  */
 3309                                 error = ENOMEM;
 3310                         }
 3311                 }
 3312         }
 3313         pmap_update(map->pmap);
 3314 
 3315 out:
 3316         if (etype & UVM_ET_STACK)
 3317                 map->sserial++;
 3318         vm_map_unlock(map);
 3319         return error;
 3320 }
 3321 
 3322 /*
 3323  * uvmspace_alloc: allocate a vmspace structure.
 3324  *
 3325  * - structure includes vm_map and pmap
 3326  * - XXX: no locking on this structure
 3327  * - refcnt set to 1, rest must be init'd by caller
 3328  */
 3329 struct vmspace *
 3330 uvmspace_alloc(vaddr_t min, vaddr_t max, boolean_t pageable,
 3331     boolean_t remove_holes)
 3332 {
 3333         struct vmspace *vm;
 3334 
 3335         vm = pool_get(&uvm_vmspace_pool, PR_WAITOK | PR_ZERO);
 3336         uvmspace_init(vm, NULL, min, max, pageable, remove_holes);
 3337         return (vm);
 3338 }
 3339 
 3340 /*
 3341  * uvmspace_init: initialize a vmspace structure.
 3342  *
 3343  * - XXX: no locking on this structure
 3344  * - refcnt set to 1, rest must be init'd by caller
 3345  */
 3346 void
 3347 uvmspace_init(struct vmspace *vm, struct pmap *pmap, vaddr_t min, vaddr_t max,
 3348     boolean_t pageable, boolean_t remove_holes)
 3349 {
 3350         KASSERT(pmap == NULL || pmap == pmap_kernel());
 3351 
 3352         if (pmap)
 3353                 pmap_reference(pmap);
 3354         else
 3355                 pmap = pmap_create();
 3356 
 3357         uvm_map_setup(&vm->vm_map, pmap, min, max,
 3358             (pageable ? VM_MAP_PAGEABLE : 0) | VM_MAP_ISVMSPACE);
 3359 
 3360         vm->vm_refcnt = 1;
 3361 
 3362         if (remove_holes)
 3363                 pmap_remove_holes(vm);
 3364 }
 3365 
 3366 /*
 3367  * uvmspace_share: share a vmspace between two processes
 3368  *
 3369  * - used for vfork
 3370  */
 3371 
 3372 struct vmspace *
 3373 uvmspace_share(struct process *pr)
 3374 {
 3375         struct vmspace *vm = pr->ps_vmspace;
 3376 
 3377         uvmspace_addref(vm);
 3378         return vm;
 3379 }
 3380 
 3381 /*
 3382  * uvmspace_exec: the process wants to exec a new program
 3383  *
 3384  * - XXX: no locking on vmspace
 3385  */
 3386 
 3387 void
 3388 uvmspace_exec(struct proc *p, vaddr_t start, vaddr_t end)
 3389 {
 3390         struct process *pr = p->p_p;
 3391         struct vmspace *nvm, *ovm = pr->ps_vmspace;
 3392         struct vm_map *map = &ovm->vm_map;
 3393         struct uvm_map_deadq dead_entries;
 3394 
 3395         KASSERT((start & (vaddr_t)PAGE_MASK) == 0);
 3396         KASSERT((end & (vaddr_t)PAGE_MASK) == 0 ||
 3397             (end & (vaddr_t)PAGE_MASK) == (vaddr_t)PAGE_MASK);
 3398 
 3399         pmap_unuse_final(p);   /* before stack addresses go away */
 3400         TAILQ_INIT(&dead_entries);
 3401 
 3402         /* see if more than one process is using this vmspace...  */
 3403         if (ovm->vm_refcnt == 1) {
 3404                 /*
 3405                  * If pr is the only process using its vmspace then
 3406                  * we can safely recycle that vmspace for the program
 3407                  * that is being exec'd.
 3408                  */
 3409 
 3410 #ifdef SYSVSHM
 3411                 /*
 3412                  * SYSV SHM semantics require us to kill all segments on an exec
 3413                  */
 3414                 if (ovm->vm_shm)
 3415                         shmexit(ovm);
 3416 #endif
 3417 
 3418                 /*
 3419                  * POSIX 1003.1b -- "lock future mappings" is revoked
 3420                  * when a process execs another program image.
 3421                  */
 3422                 vm_map_lock(map);
 3423                 vm_map_modflags(map, 0, VM_MAP_WIREFUTURE|VM_MAP_SYSCALL_ONCE);
 3424 
 3425                 /*
 3426                  * now unmap the old program
 3427                  *
 3428                  * Instead of attempting to keep the map valid, we simply
 3429                  * nuke all entries and ask uvm_map_setup to reinitialize
 3430                  * the map to the new boundaries.
 3431                  *
 3432                  * uvm_unmap_remove will actually nuke all entries for us
 3433                  * (as in, not replace them with free-memory entries).
 3434                  */
 3435                 uvm_unmap_remove(map, map->min_offset, map->max_offset,
 3436                     &dead_entries, TRUE, FALSE, FALSE);
 3437 
 3438                 KDASSERT(RBT_EMPTY(uvm_map_addr, &map->addr));
 3439 
 3440                 /* Nuke statistics and boundaries. */
 3441                 memset(&ovm->vm_startcopy, 0,
 3442                     (caddr_t) (ovm + 1) - (caddr_t) &ovm->vm_startcopy);
 3443 
 3444 
 3445                 if (end & (vaddr_t)PAGE_MASK) {
 3446                         end += 1;
 3447                         if (end == 0) /* overflow */
 3448                                 end -= PAGE_SIZE;
 3449                 }
 3450 
 3451                 /* Setup new boundaries and populate map with entries. */
 3452                 map->min_offset = start;
 3453                 map->max_offset = end;
 3454                 uvm_map_setup_entries(map);
 3455                 vm_map_unlock(map);
 3456 
 3457                 /* but keep MMU holes unavailable */
 3458                 pmap_remove_holes(ovm);
 3459         } else {
 3460                 /*
 3461                  * pr's vmspace is being shared, so we can't reuse
 3462                  * it for pr since it is still being used for others.
 3463                  * allocate a new vmspace for pr
 3464                  */
 3465                 nvm = uvmspace_alloc(start, end,
 3466                     (map->flags & VM_MAP_PAGEABLE) ? TRUE : FALSE, TRUE);
 3467 
 3468                 /* install new vmspace and drop our ref to the old one. */
 3469                 pmap_deactivate(p);
 3470                 p->p_vmspace = pr->ps_vmspace = nvm;
 3471                 pmap_activate(p);
 3472 
 3473                 uvmspace_free(ovm);
 3474         }
 3475 #ifdef PMAP_CHECK_COPYIN
 3476         p->p_vmspace->vm_map.check_copyin_count = 0;    /* disable checks */
 3477 #endif
 3478 
 3479         /* Release dead entries */
 3480         uvm_unmap_detach(&dead_entries, 0);
 3481 }
 3482 
 3483 /*
 3484  * uvmspace_addref: add a reference to a vmspace.
 3485  */
 3486 void
 3487 uvmspace_addref(struct vmspace *vm)
 3488 {
 3489         KERNEL_ASSERT_LOCKED();
 3490         KASSERT(vm->vm_refcnt > 0);
 3491 
 3492         vm->vm_refcnt++;
 3493 }
 3494 
 3495 /*
 3496  * uvmspace_free: free a vmspace data structure
 3497  */
 3498 void
 3499 uvmspace_free(struct vmspace *vm)
 3500 {
 3501         KERNEL_ASSERT_LOCKED();
 3502 
 3503         if (--vm->vm_refcnt == 0) {
 3504                 /*
 3505                  * lock the map, to wait out all other references to it.  delete
 3506                  * all of the mappings and pages they hold, then call the pmap
 3507                  * module to reclaim anything left.
 3508                  */
 3509 #ifdef SYSVSHM
 3510                 /* Get rid of any SYSV shared memory segments. */
 3511                 if (vm->vm_shm != NULL)
 3512                         shmexit(vm);
 3513 #endif
 3514 
 3515                 uvm_map_teardown(&vm->vm_map);
 3516                 pool_put(&uvm_vmspace_pool, vm);
 3517         }
 3518 }
 3519 
 3520 /*
 3521  * uvm_share: Map the address range [srcaddr, srcaddr + sz) in
 3522  * srcmap to the address range [dstaddr, dstaddr + sz) in
 3523  * dstmap.
 3524  *
 3525  * The whole address range in srcmap must be backed by an object
 3526  * (no holes).
 3527  *
 3528  * If successful, the address ranges share memory and the destination
 3529  * address range uses the protection flags in prot.
 3530  *
 3531  * This routine assumes that sz is a multiple of PAGE_SIZE and
 3532  * that dstaddr and srcaddr are page-aligned.
 3533  */
 3534 int
 3535 uvm_share(struct vm_map *dstmap, vaddr_t dstaddr, vm_prot_t prot,
 3536     struct vm_map *srcmap, vaddr_t srcaddr, vsize_t sz)
 3537 {
 3538         int ret = 0;
 3539         vaddr_t unmap_end;
 3540         vaddr_t dstva;
 3541         vsize_t s_off, len, n = sz, remain;
 3542         struct vm_map_entry *first = NULL, *last = NULL;
 3543         struct vm_map_entry *src_entry, *psrc_entry = NULL;
 3544         struct uvm_map_deadq dead;
 3545 
 3546         if (srcaddr >= srcmap->max_offset || sz > srcmap->max_offset - srcaddr)
 3547                 return EINVAL;
 3548 
 3549         TAILQ_INIT(&dead);
 3550         vm_map_lock(dstmap);
 3551         vm_map_lock_read(srcmap);
 3552 
 3553         if (!uvm_map_isavail(dstmap, NULL, &first, &last, dstaddr, sz)) {
 3554                 ret = ENOMEM;
 3555                 goto exit_unlock;
 3556         }
 3557         if (!uvm_map_lookup_entry(srcmap, srcaddr, &src_entry)) {
 3558                 ret = EINVAL;
 3559                 goto exit_unlock;
 3560         }
 3561 
 3562         dstva = dstaddr;
 3563         unmap_end = dstaddr;
 3564         for (; src_entry != NULL;
 3565             psrc_entry = src_entry,
 3566             src_entry = RBT_NEXT(uvm_map_addr, src_entry)) {
 3567                 /* hole in address space, bail out */
 3568                 if (psrc_entry != NULL && psrc_entry->end != src_entry->start)
 3569                         break;
 3570                 if (src_entry->start >= srcaddr + sz)
 3571                         break;
 3572 
 3573                 if (UVM_ET_ISSUBMAP(src_entry))
 3574                         panic("uvm_share: encountered a submap (illegal)");
 3575                 if (!UVM_ET_ISCOPYONWRITE(src_entry) &&
 3576                     UVM_ET_ISNEEDSCOPY(src_entry))
 3577                         panic("uvm_share: non-copy_on_write map entries "
 3578                             "marked needs_copy (illegal)");
 3579 
 3580                 /*
 3581                  * srcaddr > map entry start? means we are in the middle of a
 3582                  * map, so we calculate the offset to use in the source map.
 3583                  */
 3584                 if (srcaddr > src_entry->start)
 3585                         s_off = srcaddr - src_entry->start;
 3586                 else if (srcaddr == src_entry->start)
 3587                         s_off = 0;
 3588                 else
 3589                         panic("uvm_share: map entry start > srcaddr");
 3590 
 3591                 remain = src_entry->end - src_entry->start - s_off;
 3592 
 3593                 /* Determine how many bytes to share in this pass */
 3594                 if (n < remain)
 3595                         len = n;
 3596                 else
 3597                         len = remain;
 3598 
 3599                 if (uvm_mapent_share(dstmap, dstva, len, s_off, prot, prot,
 3600                     srcmap, src_entry, &dead) == NULL)
 3601                         break;
 3602 
 3603                 n -= len;
 3604                 dstva += len;
 3605                 srcaddr += len;
 3606                 unmap_end = dstva + len;
 3607                 if (n == 0)
 3608                         goto exit_unlock;
 3609         }
 3610 
 3611         ret = EINVAL;
 3612         uvm_unmap_remove(dstmap, dstaddr, unmap_end, &dead, FALSE, TRUE, FALSE);
 3613 
 3614 exit_unlock:
 3615         vm_map_unlock_read(srcmap);
 3616         vm_map_unlock(dstmap);
 3617         uvm_unmap_detach(&dead, 0);
 3618 
 3619         return ret;
 3620 }
 3621 
 3622 /*
 3623  * Clone map entry into other map.
 3624  *
 3625  * Mapping will be placed at dstaddr, for the same length.
 3626  * Space must be available.
 3627  * Reference counters are incremented.
 3628  */
 3629 struct vm_map_entry *
 3630 uvm_mapent_clone(struct vm_map *dstmap, vaddr_t dstaddr, vsize_t dstlen,
 3631     vsize_t off, vm_prot_t prot, vm_prot_t maxprot,
 3632     struct vm_map_entry *old_entry, struct uvm_map_deadq *dead,
 3633     int mapent_flags, int amap_share_flags)
 3634 {
 3635         struct vm_map_entry *new_entry, *first, *last;
 3636 
 3637         KDASSERT(!UVM_ET_ISSUBMAP(old_entry));
 3638 
 3639         /* Create new entry (linked in on creation). Fill in first, last. */
 3640         first = last = NULL;
 3641         if (!uvm_map_isavail(dstmap, NULL, &first, &last, dstaddr, dstlen)) {
 3642                 panic("uvm_mapent_clone: no space in map for "
 3643                     "entry in empty map");
 3644         }
 3645         new_entry = uvm_map_mkentry(dstmap, first, last,
 3646             dstaddr, dstlen, mapent_flags, dead, NULL);
 3647         if (new_entry == NULL)
 3648                 return NULL;
 3649         /* old_entry -> new_entry */
 3650         new_entry->object = old_entry->object;
 3651         new_entry->offset = old_entry->offset;
 3652         new_entry->aref = old_entry->aref;
 3653         new_entry->etype |= old_entry->etype & ~UVM_ET_FREEMAPPED;
 3654         new_entry->protection = prot;
 3655         new_entry->max_protection = maxprot;
 3656         new_entry->inheritance = old_entry->inheritance;
 3657         new_entry->advice = old_entry->advice;
 3658 
 3659         /* gain reference to object backing the map (can't be a submap). */
 3660         if (new_entry->aref.ar_amap) {
 3661                 new_entry->aref.ar_pageoff += off >> PAGE_SHIFT;
 3662                 amap_ref(new_entry->aref.ar_amap, new_entry->aref.ar_pageoff,
 3663                     (new_entry->end - new_entry->start) >> PAGE_SHIFT,
 3664                     amap_share_flags);
 3665         }
 3666 
 3667         if (UVM_ET_ISOBJ(new_entry) &&
 3668             new_entry->object.uvm_obj->pgops->pgo_reference) {
 3669                 new_entry->offset += off;
 3670                 new_entry->object.uvm_obj->pgops->pgo_reference
 3671                     (new_entry->object.uvm_obj);
 3672         }
 3673 
 3674         return new_entry;
 3675 }
 3676 
 3677 struct vm_map_entry *
 3678 uvm_mapent_share(struct vm_map *dstmap, vaddr_t dstaddr, vsize_t dstlen,
 3679     vsize_t off, vm_prot_t prot, vm_prot_t maxprot, struct vm_map *old_map,
 3680     struct vm_map_entry *old_entry, struct uvm_map_deadq *dead)
 3681 {
 3682         /*
 3683          * If old_entry refers to a copy-on-write region that has not yet been
 3684          * written to (needs_copy flag is set), then we need to allocate a new
 3685          * amap for old_entry.
 3686          *
 3687          * If we do not do this, and the process owning old_entry does a copy-on
 3688          * write later, old_entry and new_entry will refer to different memory
 3689          * regions, and the memory between the processes is no longer shared.
 3690          *
 3691          * [in other words, we need to clear needs_copy]
 3692          */
 3693 
 3694         if (UVM_ET_ISNEEDSCOPY(old_entry)) {
 3695                 /* get our own amap, clears needs_copy */
 3696                 amap_copy(old_map, old_entry, M_WAITOK, FALSE, 0, 0);
 3697                 /* XXXCDC: WAITOK??? */
 3698         }
 3699 
 3700         return uvm_mapent_clone(dstmap, dstaddr, dstlen, off,
 3701             prot, maxprot, old_entry, dead, 0, AMAP_SHARED);
 3702 }
 3703 
 3704 /*
 3705  * share the mapping: this means we want the old and
 3706  * new entries to share amaps and backing objects.
 3707  */
 3708 struct vm_map_entry *
 3709 uvm_mapent_forkshared(struct vmspace *new_vm, struct vm_map *new_map,
 3710     struct vm_map *old_map,
 3711     struct vm_map_entry *old_entry, struct uvm_map_deadq *dead)
 3712 {
 3713         struct vm_map_entry *new_entry;
 3714 
 3715         new_entry = uvm_mapent_share(new_map, old_entry->start,
 3716             old_entry->end - old_entry->start, 0, old_entry->protection,
 3717             old_entry->max_protection, old_map, old_entry, dead);
 3718 
 3719         /*
 3720          * pmap_copy the mappings: this routine is optional
 3721          * but if it is there it will reduce the number of
 3722          * page faults in the new proc.
 3723          */
 3724         if (!UVM_ET_ISHOLE(new_entry))
 3725                 pmap_copy(new_map->pmap, old_map->pmap, new_entry->start,
 3726                     (new_entry->end - new_entry->start), new_entry->start);
 3727 
 3728         return (new_entry);
 3729 }
 3730 
 3731 /*
 3732  * copy-on-write the mapping (using mmap's
 3733  * MAP_PRIVATE semantics)
 3734  *
 3735  * allocate new_entry, adjust reference counts.
 3736  * (note that new references are read-only).
 3737  */
 3738 struct vm_map_entry *
 3739 uvm_mapent_forkcopy(struct vmspace *new_vm, struct vm_map *new_map,
 3740     struct vm_map *old_map,
 3741     struct vm_map_entry *old_entry, struct uvm_map_deadq *dead)
 3742 {
 3743         struct vm_map_entry     *new_entry;
 3744         boolean_t                protect_child;
 3745 
 3746         new_entry = uvm_mapent_clone(new_map, old_entry->start,
 3747             old_entry->end - old_entry->start, 0, old_entry->protection,
 3748             old_entry->max_protection, old_entry, dead, 0, 0);
 3749 
 3750         new_entry->etype |=
 3751             (UVM_ET_COPYONWRITE|UVM_ET_NEEDSCOPY);
 3752 
 3753         /*
 3754          * the new entry will need an amap.  it will either
 3755          * need to be copied from the old entry or created
 3756          * from scratch (if the old entry does not have an
 3757          * amap).  can we defer this process until later
 3758          * (by setting "needs_copy") or do we need to copy
 3759          * the amap now?
 3760          *
 3761          * we must copy the amap now if any of the following
 3762          * conditions hold:
 3763          * 1. the old entry has an amap and that amap is
 3764          *    being shared.  this means that the old (parent)
 3765          *    process is sharing the amap with another
 3766          *    process.  if we do not clear needs_copy here
 3767          *    we will end up in a situation where both the
 3768          *    parent and child process are referring to the
 3769          *    same amap with "needs_copy" set.  if the
 3770          *    parent write-faults, the fault routine will
 3771          *    clear "needs_copy" in the parent by allocating
 3772          *    a new amap.   this is wrong because the
 3773          *    parent is supposed to be sharing the old amap
 3774          *    and the new amap will break that.
 3775          *
 3776          * 2. if the old entry has an amap and a non-zero
 3777          *    wire count then we are going to have to call
 3778          *    amap_cow_now to avoid page faults in the
 3779          *    parent process.   since amap_cow_now requires
 3780          *    "needs_copy" to be clear we might as well
 3781          *    clear it here as well.
 3782          *
 3783          */
 3784         if (old_entry->aref.ar_amap != NULL &&
 3785             ((amap_flags(old_entry->aref.ar_amap) &
 3786             AMAP_SHARED) != 0 ||
 3787             VM_MAPENT_ISWIRED(old_entry))) {
 3788                 amap_copy(new_map, new_entry, M_WAITOK, FALSE,
 3789                     0, 0);
 3790                 /* XXXCDC: M_WAITOK ... ok? */
 3791         }
 3792 
 3793         /*
 3794          * if the parent's entry is wired down, then the
 3795          * parent process does not want page faults on
 3796          * access to that memory.  this means that we
 3797          * cannot do copy-on-write because we can't write
 3798          * protect the old entry.   in this case we
 3799          * resolve all copy-on-write faults now, using
 3800          * amap_cow_now.   note that we have already
 3801          * allocated any needed amap (above).
 3802          */
 3803         if (VM_MAPENT_ISWIRED(old_entry)) {
 3804                 /*
 3805                  * resolve all copy-on-write faults now
 3806                  * (note that there is nothing to do if
 3807                  * the old mapping does not have an amap).
 3808                  * XXX: is it worthwhile to bother with
 3809                  * pmap_copy in this case?
 3810                  */
 3811                 if (old_entry->aref.ar_amap)
 3812                         amap_cow_now(new_map, new_entry);
 3813         } else {
 3814                 if (old_entry->aref.ar_amap) {
 3815                         /*
 3816                          * setup mappings to trigger copy-on-write faults
 3817                          * we must write-protect the parent if it has
 3818                          * an amap and it is not already "needs_copy"...
 3819                          * if it is already "needs_copy" then the parent
 3820                          * has already been write-protected by a previous
 3821                          * fork operation.
 3822                          *
 3823                          * if we do not write-protect the parent, then
 3824                          * we must be sure to write-protect the child
 3825                          * after the pmap_copy() operation.
 3826                          *
 3827                          * XXX: pmap_copy should have some way of telling
 3828                          * us that it didn't do anything so we can avoid
 3829                          * calling pmap_protect needlessly.
 3830                          */
 3831                         if (!UVM_ET_ISNEEDSCOPY(old_entry)) {
 3832                                 if (old_entry->max_protection & PROT_WRITE) {
 3833                                         uvm_map_lock_entry(old_entry);
 3834                                         pmap_protect(old_map->pmap,
 3835                                             old_entry->start,
 3836                                             old_entry->end,
 3837                                             old_entry->protection &
 3838                                             ~PROT_WRITE);
 3839                                         uvm_map_unlock_entry(old_entry);
 3840                                         pmap_update(old_map->pmap);
 3841                                 }
 3842                                 old_entry->etype |= UVM_ET_NEEDSCOPY;
 3843                         }
 3844 
 3845                         /* parent must now be write-protected */
 3846                         protect_child = FALSE;
 3847                 } else {
 3848                         /*
 3849                          * we only need to protect the child if the
 3850                          * parent has write access.
 3851                          */
 3852                         if (old_entry->max_protection & PROT_WRITE)
 3853                                 protect_child = TRUE;
 3854                         else
 3855                                 protect_child = FALSE;
 3856                 }
 3857                 /*
 3858                  * copy the mappings
 3859                  * XXX: need a way to tell if this does anything
 3860                  */
 3861                 if (!UVM_ET_ISHOLE(new_entry))
 3862                         pmap_copy(new_map->pmap, old_map->pmap,
 3863                             new_entry->start,
 3864                             (old_entry->end - old_entry->start),
 3865                             old_entry->start);
 3866 
 3867                 /* protect the child's mappings if necessary */
 3868                 if (protect_child) {
 3869                         pmap_protect(new_map->pmap, new_entry->start,
 3870                             new_entry->end,
 3871                             new_entry->protection &
 3872                             ~PROT_WRITE);
 3873                 }
 3874         }
 3875 
 3876         return (new_entry);
 3877 }
 3878 
 3879 /*
 3880  * zero the mapping: the new entry will be zero initialized
 3881  */
 3882 struct vm_map_entry *
 3883 uvm_mapent_forkzero(struct vmspace *new_vm, struct vm_map *new_map,
 3884     struct vm_map *old_map,
 3885     struct vm_map_entry *old_entry, struct uvm_map_deadq *dead)
 3886 {
 3887         struct vm_map_entry *new_entry;
 3888 
 3889         new_entry = uvm_mapent_clone(new_map, old_entry->start,
 3890             old_entry->end - old_entry->start, 0, old_entry->protection,
 3891             old_entry->max_protection, old_entry, dead, 0, 0);
 3892 
 3893         new_entry->etype |=
 3894             (UVM_ET_COPYONWRITE|UVM_ET_NEEDSCOPY);
 3895 
 3896         if (new_entry->aref.ar_amap) {
 3897                 amap_unref(new_entry->aref.ar_amap, new_entry->aref.ar_pageoff,
 3898                     atop(new_entry->end - new_entry->start), 0);
 3899                 new_entry->aref.ar_amap = NULL;
 3900                 new_entry->aref.ar_pageoff = 0;
 3901         }
 3902 
 3903         if (UVM_ET_ISOBJ(new_entry)) {
 3904                 if (new_entry->object.uvm_obj->pgops->pgo_detach)
 3905                         new_entry->object.uvm_obj->pgops->pgo_detach(
 3906                             new_entry->object.uvm_obj);
 3907                 new_entry->object.uvm_obj = NULL;
 3908                 new_entry->etype &= ~UVM_ET_OBJ;
 3909         }
 3910 
 3911         return (new_entry);
 3912 }
 3913 
 3914 /*
 3915  * uvmspace_fork: fork a process' main map
 3916  *
 3917  * => create a new vmspace for child process from parent.
 3918  * => parent's map must not be locked.
 3919  */
 3920 struct vmspace *
 3921 uvmspace_fork(struct process *pr)
 3922 {
 3923         struct vmspace *vm1 = pr->ps_vmspace;
 3924         struct vmspace *vm2;
 3925         struct vm_map *old_map = &vm1->vm_map;
 3926         struct vm_map *new_map;
 3927         struct vm_map_entry *old_entry, *new_entry;
 3928         struct uvm_map_deadq dead;
 3929 
 3930         vm_map_lock(old_map);
 3931 
 3932         vm2 = uvmspace_alloc(old_map->min_offset, old_map->max_offset,
 3933             (old_map->flags & VM_MAP_PAGEABLE) ? TRUE : FALSE, FALSE);
 3934         memcpy(&vm2->vm_startcopy, &vm1->vm_startcopy,
 3935             (caddr_t) (vm1 + 1) - (caddr_t) &vm1->vm_startcopy);
 3936         vm2->vm_dused = 0; /* Statistic managed by us. */
 3937         new_map = &vm2->vm_map;
 3938         vm_map_lock(new_map);
 3939 
 3940         /* go entry-by-entry */
 3941         TAILQ_INIT(&dead);
 3942         RBT_FOREACH(old_entry, uvm_map_addr, &old_map->addr) {
 3943                 if (old_entry->start == old_entry->end)
 3944                         continue;
 3945 
 3946                 /* first, some sanity checks on the old entry */
 3947                 if (UVM_ET_ISSUBMAP(old_entry)) {
 3948                         panic("fork: encountered a submap during fork "
 3949                             "(illegal)");
 3950                 }
 3951 
 3952                 if (!UVM_ET_ISCOPYONWRITE(old_entry) &&
 3953                     UVM_ET_ISNEEDSCOPY(old_entry)) {
 3954                         panic("fork: non-copy_on_write map entry marked "
 3955                             "needs_copy (illegal)");
 3956                 }
 3957 
 3958                 /* Apply inheritance. */
 3959                 switch (old_entry->inheritance) {
 3960                 case MAP_INHERIT_SHARE:
 3961                         new_entry = uvm_mapent_forkshared(vm2, new_map,
 3962                             old_map, old_entry, &dead);
 3963                         break;
 3964                 case MAP_INHERIT_COPY:
 3965                         new_entry = uvm_mapent_forkcopy(vm2, new_map,
 3966                             old_map, old_entry, &dead);
 3967                         break;
 3968                 case MAP_INHERIT_ZERO:
 3969                         new_entry = uvm_mapent_forkzero(vm2, new_map,
 3970                             old_map, old_entry, &dead);
 3971                         break;
 3972                 default:
 3973                         continue;
 3974                 }
 3975 
 3976                 /* Update process statistics. */
 3977                 if (!UVM_ET_ISHOLE(new_entry))
 3978                         new_map->size += new_entry->end - new_entry->start;
 3979                 if (!UVM_ET_ISOBJ(new_entry) && !UVM_ET_ISHOLE(new_entry) &&
 3980                     new_entry->protection != PROT_NONE) {
 3981                         vm2->vm_dused += uvmspace_dused(
 3982                             new_map, new_entry->start, new_entry->end);
 3983                 }
 3984         }
 3985 
 3986         vm_map_unlock(old_map);
 3987         vm_map_unlock(new_map);
 3988 
 3989         /*
 3990          * This can actually happen, if multiple entries described a
 3991          * space in which an entry was inherited.
 3992          */
 3993         uvm_unmap_detach(&dead, 0);
 3994 
 3995 #ifdef SYSVSHM
 3996         if (vm1->vm_shm)
 3997                 shmfork(vm1, vm2);
 3998 #endif
 3999 
 4000         return vm2;
 4001 }
 4002 
 4003 /*
 4004  * uvm_map_hint: return the beginning of the best area suitable for
 4005  * creating a new mapping with "prot" protection.
 4006  */
 4007 vaddr_t
 4008 uvm_map_hint(struct vmspace *vm, vm_prot_t prot, vaddr_t minaddr,
 4009     vaddr_t maxaddr)
 4010 {
 4011         vaddr_t addr;
 4012         vaddr_t spacing;
 4013 
 4014 #ifdef __i386__
 4015         /*
 4016          * If executable skip first two pages, otherwise start
 4017          * after data + heap region.
 4018          */
 4019         if ((prot & PROT_EXEC) != 0 &&
 4020             (vaddr_t)vm->vm_daddr >= I386_MAX_EXE_ADDR) {
 4021                 addr = (PAGE_SIZE*2) +
 4022                     (arc4random() & (I386_MAX_EXE_ADDR / 2 - 1));
 4023                 return (round_page(addr));
 4024         }
 4025 #endif
 4026 
 4027 #if defined (__LP64__)
 4028         spacing = MIN(4UL * 1024 * 1024 * 1024, MAXDSIZ) - 1;
 4029 #else
 4030         spacing = MIN(1 * 1024 * 1024 * 1024, MAXDSIZ) - 1;
 4031 #endif
 4032 
 4033         /*
 4034          * Start malloc/mmap after the brk.
 4035          */
 4036         addr = (vaddr_t)vm->vm_daddr + BRKSIZ;
 4037         addr = MAX(addr, minaddr);
 4038 
 4039         if (addr < maxaddr) {
 4040                 while (spacing > maxaddr - addr)
 4041                         spacing >>= 1;
 4042         }
 4043         addr += arc4random() & spacing;
 4044         return (round_page(addr));
 4045 }
 4046 
 4047 /*
 4048  * uvm_map_submap: punch down part of a map into a submap
 4049  *
 4050  * => only the kernel_map is allowed to be submapped
 4051  * => the purpose of submapping is to break up the locking granularity
 4052  *      of a larger map
 4053  * => the range specified must have been mapped previously with a uvm_map()
 4054  *      call [with uobj==NULL] to create a blank map entry in the main map.
 4055  *      [And it had better still be blank!]
 4056  * => maps which contain submaps should never be copied or forked.
 4057  * => to remove a submap, use uvm_unmap() on the main map
 4058  *      and then uvm_map_deallocate() the submap.
 4059  * => main map must be unlocked.
 4060  * => submap must have been init'd and have a zero reference count.
 4061  *      [need not be locked as we don't actually reference it]
 4062  */
 4063 int
 4064 uvm_map_submap(struct vm_map *map, vaddr_t start, vaddr_t end,
 4065     struct vm_map *submap)
 4066 {
 4067         struct vm_map_entry *entry;
 4068         int result;
 4069 
 4070         if (start > map->max_offset || end > map->max_offset ||
 4071             start < map->min_offset || end < map->min_offset)
 4072                 return EINVAL;
 4073 
 4074         vm_map_lock(map);
 4075 
 4076         if (uvm_map_lookup_entry(map, start, &entry)) {
 4077                 UVM_MAP_CLIP_START(map, entry, start);
 4078                 UVM_MAP_CLIP_END(map, entry, end);
 4079         } else
 4080                 entry = NULL;
 4081 
 4082         if (entry != NULL &&
 4083             entry->start == start && entry->end == end &&
 4084             entry->object.uvm_obj == NULL && entry->aref.ar_amap == NULL &&
 4085             !UVM_ET_ISCOPYONWRITE(entry) && !UVM_ET_ISNEEDSCOPY(entry)) {
 4086                 entry->etype |= UVM_ET_SUBMAP;
 4087                 entry->object.sub_map = submap;
 4088                 entry->offset = 0;
 4089                 uvm_map_reference(submap);
 4090                 result = 0;
 4091         } else
 4092                 result = EINVAL;
 4093 
 4094         vm_map_unlock(map);
 4095         return result;
 4096 }
 4097 
 4098 /*
 4099  * uvm_map_checkprot: check protection in map
 4100  *
 4101  * => must allow specific protection in a fully allocated region.
 4102  * => map must be read or write locked by caller.
 4103  */
 4104 boolean_t
 4105 uvm_map_checkprot(struct vm_map *map, vaddr_t start, vaddr_t end,
 4106     vm_prot_t protection)
 4107 {
 4108         struct vm_map_entry *entry;
 4109 
 4110         vm_map_assert_anylock(map);
 4111 
 4112         if (start < map->min_offset || end > map->max_offset || start > end)
 4113                 return FALSE;
 4114         if (start == end)
 4115                 return TRUE;
 4116 
 4117         /*
 4118          * Iterate entries.
 4119          */
 4120         for (entry = uvm_map_entrybyaddr(&map->addr, start);
 4121             entry != NULL && entry->start < end;
 4122             entry = RBT_NEXT(uvm_map_addr, entry)) {
 4123                 /* Fail if a hole is found. */
 4124                 if (UVM_ET_ISHOLE(entry) ||
 4125                     (entry->end < end && entry->end != VMMAP_FREE_END(entry)))
 4126                         return FALSE;
 4127 
 4128                 /* Check protection. */
 4129                 if ((entry->protection & protection) != protection)
 4130                         return FALSE;
 4131         }
 4132         return TRUE;
 4133 }
 4134 
 4135 /*
 4136  * uvm_map_create: create map
 4137  */
 4138 vm_map_t
 4139 uvm_map_create(pmap_t pmap, vaddr_t min, vaddr_t max, int flags)
 4140 {
 4141         vm_map_t map;
 4142 
 4143         map = malloc(sizeof *map, M_VMMAP, M_WAITOK);
 4144         uvm_map_setup(map, pmap, min, max, flags);
 4145         return (map);
 4146 }
 4147 
 4148 /*
 4149  * uvm_map_deallocate: drop reference to a map
 4150  *
 4151  * => caller must not lock map
 4152  * => we will zap map if ref count goes to zero
 4153  */
 4154 void
 4155 uvm_map_deallocate(vm_map_t map)
 4156 {
 4157         int c;
 4158         struct uvm_map_deadq dead;
 4159 
 4160         c = atomic_dec_int_nv(&map->ref_count);
 4161         if (c > 0) {
 4162                 return;
 4163         }
 4164 
 4165         /*
 4166          * all references gone.   unmap and free.
 4167          *
 4168          * No lock required: we are only one to access this map.
 4169          */
 4170         TAILQ_INIT(&dead);
 4171         uvm_tree_sanity(map, __FILE__, __LINE__);
 4172         vm_map_lock(map);
 4173         uvm_unmap_remove(map, map->min_offset, map->max_offset, &dead,
 4174             TRUE, FALSE, FALSE);
 4175         vm_map_unlock(map);
 4176         pmap_destroy(map->pmap);
 4177         KASSERT(RBT_EMPTY(uvm_map_addr, &map->addr));
 4178         free(map, M_VMMAP, sizeof *map);
 4179 
 4180         uvm_unmap_detach(&dead, 0);
 4181 }
 4182 
 4183 /*
 4184  * uvm_map_inherit: set inheritance code for range of addrs in map.
 4185  *
 4186  * => map must be unlocked
 4187  * => note that the inherit code is used during a "fork".  see fork
 4188  *      code for details.
 4189  */
 4190 int
 4191 uvm_map_inherit(struct vm_map *map, vaddr_t start, vaddr_t end,
 4192     vm_inherit_t new_inheritance)
 4193 {
 4194         struct vm_map_entry *entry;
 4195 
 4196         switch (new_inheritance) {
 4197         case MAP_INHERIT_NONE:
 4198         case MAP_INHERIT_COPY:
 4199         case MAP_INHERIT_SHARE:
 4200         case MAP_INHERIT_ZERO:
 4201                 break;
 4202         default:
 4203                 return (EINVAL);
 4204         }
 4205 
 4206         if (start > end)
 4207                 return EINVAL;
 4208         start = MAX(start, map->min_offset);
 4209         end = MIN(end, map->max_offset);
 4210         if (start >= end)
 4211                 return 0;
 4212 
 4213         vm_map_lock(map);
 4214 
 4215         entry = uvm_map_entrybyaddr(&map->addr, start);
 4216         if (entry->end > start)
 4217                 UVM_MAP_CLIP_START(map, entry, start);
 4218         else
 4219                 entry = RBT_NEXT(uvm_map_addr, entry);
 4220 
 4221         while (entry != NULL && entry->start < end) {
 4222                 UVM_MAP_CLIP_END(map, entry, end);
 4223                 entry->inheritance = new_inheritance;
 4224                 entry = RBT_NEXT(uvm_map_addr, entry);
 4225         }
 4226 
 4227         vm_map_unlock(map);
 4228         return (0);
 4229 }
 4230 
 4231 #ifdef PMAP_CHECK_COPYIN
 4232 static void inline
 4233 check_copyin_add(struct vm_map *map, vaddr_t start, vaddr_t end)
 4234 {
 4235         if (PMAP_CHECK_COPYIN == 0 ||
 4236             map->check_copyin_count >= UVM_MAP_CHECK_COPYIN_MAX)
 4237                 return;
 4238         map->check_copyin[map->check_copyin_count].start = start;
 4239         map->check_copyin[map->check_copyin_count].end = end;
 4240         membar_producer();
 4241         map->check_copyin_count++;
 4242 }
 4243 
 4244 /* 
 4245  * uvm_map_check_copyin_add: remember regions which are X-only for copyin(),
 4246  * copyinstr(), uiomove(), and others
 4247  *
 4248  * => map must be unlocked
 4249  */
 4250 int
 4251 uvm_map_check_copyin_add(struct vm_map *map, vaddr_t start, vaddr_t end)
 4252 {
 4253         if (start > end)
 4254                 return EINVAL;
 4255         start = MAX(start, map->min_offset);
 4256         end = MIN(end, map->max_offset);
 4257         if (start >= end)
 4258                 return 0;
 4259         check_copyin_add(map, start, end);
 4260         return (0);
 4261 }
 4262 #endif /* PMAP_CHECK_COPYIN */
 4263 
 4264 /* 
 4265  * uvm_map_syscall: permit system calls for range of addrs in map.
 4266  *
 4267  * => map must be unlocked
 4268  */
 4269 int
 4270 uvm_map_syscall(struct vm_map *map, vaddr_t start, vaddr_t end)
 4271 {
 4272         struct vm_map_entry *entry;
 4273 
 4274         if (start > end)
 4275                 return EINVAL;
 4276         start = MAX(start, map->min_offset);
 4277         end = MIN(end, map->max_offset);
 4278         if (start >= end)
 4279                 return 0;
 4280         if (map->flags & VM_MAP_SYSCALL_ONCE)   /* only allowed once */
 4281                 return (EPERM);
 4282 
 4283         vm_map_lock(map);
 4284 
 4285         entry = uvm_map_entrybyaddr(&map->addr, start);
 4286         if (entry->end > start)
 4287                 UVM_MAP_CLIP_START(map, entry, start);
 4288         else
 4289                 entry = RBT_NEXT(uvm_map_addr, entry);
 4290 
 4291         while (entry != NULL && entry->start < end) {
 4292                 UVM_MAP_CLIP_END(map, entry, end);
 4293                 entry->etype |= UVM_ET_SYSCALL;
 4294                 entry = RBT_NEXT(uvm_map_addr, entry);
 4295         }
 4296 
 4297 #ifdef PMAP_CHECK_COPYIN
 4298         check_copyin_add(map, start, end);      /* Add libc's text segment */
 4299 #endif
 4300         map->wserial++;
 4301         map->flags |= VM_MAP_SYSCALL_ONCE;
 4302         vm_map_unlock(map);
 4303         return (0);
 4304 }
 4305 
 4306 /* 
 4307  * uvm_map_immutable: block mapping/mprotect for range of addrs in map.
 4308  *
 4309  * => map must be unlocked
 4310  */
 4311 int
 4312 uvm_map_immutable(struct vm_map *map, vaddr_t start, vaddr_t end, int imut)
 4313 {
 4314         struct vm_map_entry *entry;
 4315 
 4316         if (start > end)
 4317                 return EINVAL;
 4318         start = MAX(start, map->min_offset);
 4319         end = MIN(end, map->max_offset);
 4320         if (start >= end)
 4321                 return 0;
 4322 
 4323         vm_map_lock(map);
 4324 
 4325         entry = uvm_map_entrybyaddr(&map->addr, start);
 4326         if (entry->end > start)
 4327                 UVM_MAP_CLIP_START(map, entry, start);
 4328         else
 4329                 entry = RBT_NEXT(uvm_map_addr, entry);
 4330 
 4331         while (entry != NULL && entry->start < end) {
 4332                 UVM_MAP_CLIP_END(map, entry, end);
 4333                 if (imut)
 4334                         entry->etype |= UVM_ET_IMMUTABLE;
 4335                 else
 4336                         entry->etype &= ~UVM_ET_IMMUTABLE;
 4337                 entry = RBT_NEXT(uvm_map_addr, entry);
 4338         }
 4339 
 4340         map->wserial++;
 4341         vm_map_unlock(map);
 4342         return (0);
 4343 }
 4344 
 4345 /*
 4346  * uvm_map_advice: set advice code for range of addrs in map.
 4347  *
 4348  * => map must be unlocked
 4349  */
 4350 int
 4351 uvm_map_advice(struct vm_map *map, vaddr_t start, vaddr_t end, int new_advice)
 4352 {
 4353         struct vm_map_entry *entry;
 4354 
 4355         switch (new_advice) {
 4356         case MADV_NORMAL:
 4357         case MADV_RANDOM:
 4358         case MADV_SEQUENTIAL:
 4359                 break;
 4360         default:
 4361                 return (EINVAL);
 4362         }
 4363 
 4364         if (start > end)
 4365                 return EINVAL;
 4366         start = MAX(start, map->min_offset);
 4367         end = MIN(end, map->max_offset);
 4368         if (start >= end)
 4369                 return 0;
 4370 
 4371         vm_map_lock(map);
 4372 
 4373         entry = uvm_map_entrybyaddr(&map->addr, start);
 4374         if (entry != NULL && entry->end > start)
 4375                 UVM_MAP_CLIP_START(map, entry, start);
 4376         else if (entry!= NULL)
 4377                 entry = RBT_NEXT(uvm_map_addr, entry);
 4378 
 4379         /*
 4380          * XXXJRT: disallow holes?
 4381          */
 4382         while (entry != NULL && entry->start < end) {
 4383                 UVM_MAP_CLIP_END(map, entry, end);
 4384                 entry->advice = new_advice;
 4385                 entry = RBT_NEXT(uvm_map_addr, entry);
 4386         }
 4387 
 4388         vm_map_unlock(map);
 4389         return (0);
 4390 }
 4391 
 4392 /*
 4393  * uvm_map_extract: extract a mapping from a map and put it somewhere
 4394  * in the kernel_map, setting protection to max_prot.
 4395  *
 4396  * => map should be unlocked (we will write lock it and kernel_map)
 4397  * => returns 0 on success, error code otherwise
 4398  * => start must be page aligned
 4399  * => len must be page sized
 4400  * => flags:
 4401  *      UVM_EXTRACT_FIXPROT: set prot to maxprot as we go
 4402  * Mappings are QREF's.
 4403  */
 4404 int
 4405 uvm_map_extract(struct vm_map *srcmap, vaddr_t start, vsize_t len,
 4406     vaddr_t *dstaddrp, int flags)
 4407 {
 4408         struct uvm_map_deadq dead;
 4409         struct vm_map_entry *first, *entry, *newentry, *tmp1, *tmp2;
 4410         vaddr_t dstaddr;
 4411         vaddr_t end;
 4412         vaddr_t cp_start;
 4413         vsize_t cp_len, cp_off;
 4414         int error;
 4415 
 4416         TAILQ_INIT(&dead);
 4417         end = start + len;
 4418 
 4419         /*
 4420          * Sanity check on the parameters.
 4421          * Also, since the mapping may not contain gaps, error out if the
 4422          * mapped area is not in source map.
 4423          */
 4424         if ((start & (vaddr_t)PAGE_MASK) != 0 ||
 4425             (end & (vaddr_t)PAGE_MASK) != 0 || end < start)
 4426                 return EINVAL;
 4427         if (start < srcmap->min_offset || end > srcmap->max_offset)
 4428                 return EINVAL;
 4429 
 4430         /* Initialize dead entries. Handle len == 0 case. */
 4431         if (len == 0)
 4432                 return 0;
 4433 
 4434         /* Acquire lock on srcmap. */
 4435         vm_map_lock(srcmap);
 4436 
 4437         /* Lock srcmap, lookup first and last entry in <start,len>. */
 4438         first = uvm_map_entrybyaddr(&srcmap->addr, start);
 4439 
 4440         /* Check that the range is contiguous. */
 4441         for (entry = first; entry != NULL && entry->end < end;
 4442             entry = RBT_NEXT(uvm_map_addr, entry)) {
 4443                 if (VMMAP_FREE_END(entry) != entry->end ||
 4444                     UVM_ET_ISHOLE(entry)) {
 4445                         error = EINVAL;
 4446                         goto fail;
 4447                 }
 4448         }
 4449         if (entry == NULL || UVM_ET_ISHOLE(entry)) {
 4450                 error = EINVAL;
 4451                 goto fail;
 4452         }
 4453 
 4454         /*
 4455          * Handle need-copy flag.
 4456          */
 4457         for (entry = first; entry != NULL && entry->start < end;
 4458             entry = RBT_NEXT(uvm_map_addr, entry)) {
 4459                 if (UVM_ET_ISNEEDSCOPY(entry))
 4460                         amap_copy(srcmap, entry, M_NOWAIT,
 4461                             UVM_ET_ISSTACK(entry) ? FALSE : TRUE, start, end);
 4462                 if (UVM_ET_ISNEEDSCOPY(entry)) {
 4463                         /*
 4464                          * amap_copy failure
 4465                          */
 4466                         error = ENOMEM;
 4467                         goto fail;
 4468                 }
 4469         }
 4470 
 4471         /* Lock destination map (kernel_map). */
 4472         vm_map_lock(kernel_map);
 4473 
 4474         if (uvm_map_findspace(kernel_map, &tmp1, &tmp2, &dstaddr, len,
 4475             MAX(PAGE_SIZE, PMAP_PREFER_ALIGN()), PMAP_PREFER_OFFSET(start),
 4476             PROT_NONE, 0) != 0) {
 4477                 error = ENOMEM;
 4478                 goto fail2;
 4479         }
 4480         *dstaddrp = dstaddr;
 4481 
 4482         /*
 4483          * We now have srcmap and kernel_map locked.
 4484          * dstaddr contains the destination offset in dstmap.
 4485          */
 4486         /* step 1: start looping through map entries, performing extraction. */
 4487         for (entry = first; entry != NULL && entry->start < end;
 4488             entry = RBT_NEXT(uvm_map_addr, entry)) {
 4489                 KDASSERT(!UVM_ET_ISNEEDSCOPY(entry));
 4490                 if (UVM_ET_ISHOLE(entry))
 4491                         continue;
 4492 
 4493                 /* Calculate uvm_mapent_clone parameters. */
 4494                 cp_start = entry->start;
 4495                 if (cp_start < start) {
 4496                         cp_off = start - cp_start;
 4497                         cp_start = start;
 4498                 } else
 4499                         cp_off = 0;
 4500                 cp_len = MIN(entry->end, end) - cp_start;
 4501 
 4502                 newentry = uvm_mapent_clone(kernel_map,
 4503                     cp_start - start + dstaddr, cp_len, cp_off,
 4504                     entry->protection, entry->max_protection,
 4505                     entry, &dead, flags, AMAP_SHARED | AMAP_REFALL);
 4506                 if (newentry == NULL) {
 4507                         error = ENOMEM;
 4508                         goto fail2_unmap;
 4509                 }
 4510                 kernel_map->size += cp_len;
 4511 
 4512                 /* Figure out the best protection */ 
 4513                 if ((flags & UVM_EXTRACT_FIXPROT) &&
 4514                     newentry->protection != PROT_NONE)
 4515                         newentry->protection = newentry->max_protection;
 4516                 newentry->protection &= ~PROT_EXEC;
 4517 
 4518                 /*
 4519                  * Step 2: perform pmap copy.
 4520                  * (Doing this in the loop saves one RB traversal.)
 4521                  */
 4522                 pmap_copy(kernel_map->pmap, srcmap->pmap,
 4523                     cp_start - start + dstaddr, cp_len, cp_start);
 4524         }
 4525         pmap_update(kernel_map->pmap);
 4526 
 4527         error = 0;
 4528 
 4529         /* Unmap copied entries on failure. */
 4530 fail2_unmap:
 4531         if (error) {
 4532                 uvm_unmap_remove(kernel_map, dstaddr, dstaddr + len, &dead,
 4533                     FALSE, TRUE, FALSE);
 4534         }
 4535 
 4536         /* Release maps, release dead entries. */
 4537 fail2:
 4538         vm_map_unlock(kernel_map);
 4539 
 4540 fail:
 4541         vm_map_unlock(srcmap);
 4542 
 4543         uvm_unmap_detach(&dead, 0);
 4544 
 4545         return error;
 4546 }
 4547 
 4548 /*
 4549  * uvm_map_clean: clean out a map range
 4550  *
 4551  * => valid flags:
 4552  *   if (flags & PGO_CLEANIT): dirty pages are cleaned first
 4553  *   if (flags & PGO_SYNCIO): dirty pages are written synchronously
 4554  *   if (flags & PGO_DEACTIVATE): any cached pages are deactivated after clean
 4555  *   if (flags & PGO_FREE): any cached pages are freed after clean
 4556  * => returns an error if any part of the specified range isn't mapped
 4557  * => never a need to flush amap layer since the anonymous memory has
 4558  *      no permanent home, but may deactivate pages there
 4559  * => called from sys_msync() and sys_madvise()
 4560  * => caller must not write-lock map (read OK).
 4561  * => we may sleep while cleaning if SYNCIO [with map read-locked]
 4562  */
 4563 
 4564 int
 4565 uvm_map_clean(struct vm_map *map, vaddr_t start, vaddr_t end, int flags)
 4566 {
 4567         struct vm_map_entry *first, *entry;
 4568         struct vm_amap *amap;
 4569         struct vm_anon *anon;
 4570         struct vm_page *pg;
 4571         struct uvm_object *uobj;
 4572         vaddr_t cp_start, cp_end;
 4573         int refs;
 4574         int error;
 4575         boolean_t rv;
 4576 
 4577         KASSERT((flags & (PGO_FREE|PGO_DEACTIVATE)) !=
 4578             (PGO_FREE|PGO_DEACTIVATE));
 4579 
 4580         if (start > end || start < map->min_offset || end > map->max_offset)
 4581                 return EINVAL;
 4582 
 4583         vm_map_lock_read(map);
 4584         first = uvm_map_entrybyaddr(&map->addr, start);
 4585 
 4586         /* Make a first pass to check for holes. */
 4587         for (entry = first; entry != NULL && entry->start < end;
 4588             entry = RBT_NEXT(uvm_map_addr, entry)) {
 4589                 if (UVM_ET_ISSUBMAP(entry)) {
 4590                         vm_map_unlock_read(map);
 4591                         return EINVAL;
 4592                 }
 4593                 if (UVM_ET_ISSUBMAP(entry) ||
 4594                     UVM_ET_ISHOLE(entry) ||
 4595                     (entry->end < end &&
 4596                     VMMAP_FREE_END(entry) != entry->end)) {
 4597                         vm_map_unlock_read(map);
 4598                         return EFAULT;
 4599                 }
 4600         }
 4601 
 4602         error = 0;
 4603         for (entry = first; entry != NULL && entry->start < end;
 4604             entry = RBT_NEXT(uvm_map_addr, entry)) {
 4605                 amap = entry->aref.ar_amap;     /* top layer */
 4606                 if (UVM_ET_ISOBJ(entry))
 4607                         uobj = entry->object.uvm_obj;
 4608                 else
 4609                         uobj = NULL;
 4610 
 4611                 /*
 4612                  * No amap cleaning necessary if:
 4613                  *  - there's no amap
 4614                  *  - we're not deactivating or freeing pages.
 4615                  */
 4616                 if (amap == NULL || (flags & (PGO_DEACTIVATE|PGO_FREE)) == 0)
 4617                         goto flush_object;
 4618 
 4619                 cp_start = MAX(entry->start, start);
 4620                 cp_end = MIN(entry->end, end);
 4621 
 4622                 amap_lock(amap);
 4623                 for (; cp_start != cp_end; cp_start += PAGE_SIZE) {
 4624                         anon = amap_lookup(&entry->aref,
 4625                             cp_start - entry->start);
 4626                         if (anon == NULL)
 4627                                 continue;
 4628 
 4629                         KASSERT(anon->an_lock == amap->am_lock);
 4630                         pg = anon->an_page;
 4631                         if (pg == NULL) {
 4632                                 continue;
 4633                         }
 4634                         KASSERT(pg->pg_flags & PQ_ANON);
 4635 
 4636                         switch (flags & (PGO_CLEANIT|PGO_FREE|PGO_DEACTIVATE)) {
 4637                         /*
 4638                          * XXX In these first 3 cases, we always just
 4639                          * XXX deactivate the page.  We may want to
 4640                          * XXX handle the different cases more
 4641                          * XXX specifically, in the future.
 4642                          */
 4643                         case PGO_CLEANIT|PGO_FREE:
 4644                         case PGO_CLEANIT|PGO_DEACTIVATE:
 4645                         case PGO_DEACTIVATE:
 4646 deactivate_it:
 4647                                 /* skip the page if it's wired */
 4648                                 if (pg->wire_count != 0)
 4649                                         break;
 4650 
 4651                                 uvm_lock_pageq();
 4652 
 4653                                 KASSERT(pg->uanon == anon);
 4654 
 4655                                 /* zap all mappings for the page. */
 4656                                 pmap_page_protect(pg, PROT_NONE);
 4657 
 4658                                 /* ...and deactivate the page. */
 4659                                 uvm_pagedeactivate(pg);
 4660 
 4661                                 uvm_unlock_pageq();
 4662                                 break;
 4663                         case PGO_FREE:
 4664                                 /*
 4665                                  * If there are multiple references to
 4666                                  * the amap, just deactivate the page.
 4667                                  */
 4668                                 if (amap_refs(amap) > 1)
 4669                                         goto deactivate_it;
 4670 
 4671                                 /* XXX skip the page if it's wired */
 4672                                 if (pg->wire_count != 0) {
 4673                                         break;
 4674                                 }
 4675                                 amap_unadd(&entry->aref,
 4676                                     cp_start - entry->start);
 4677                                 refs = --anon->an_ref;
 4678                                 if (refs == 0)
 4679                                         uvm_anfree(anon);
 4680                                 break;
 4681                         default:
 4682                                 panic("uvm_map_clean: weird flags");
 4683                         }
 4684                 }
 4685                 amap_unlock(amap);
 4686 
 4687 flush_object:
 4688                 cp_start = MAX(entry->start, start);
 4689                 cp_end = MIN(entry->end, end);
 4690 
 4691                 /*
 4692                  * flush pages if we've got a valid backing object.
 4693                  *
 4694                  * Don't PGO_FREE if we don't have write permission
 4695                  * and don't flush if this is a copy-on-write object
 4696                  * since we can't know our permissions on it.
 4697                  */
 4698                 if (uobj != NULL &&
 4699                     ((flags & PGO_FREE) == 0 ||
 4700                      ((entry->max_protection & PROT_WRITE) != 0 &&
 4701                       (entry->etype & UVM_ET_COPYONWRITE) == 0))) {
 4702                         rw_enter(uobj->vmobjlock, RW_WRITE);
 4703                         rv = uobj->pgops->pgo_flush(uobj,
 4704                             cp_start - entry->start + entry->offset,
 4705                             cp_end - entry->start + entry->offset, flags);
 4706                         rw_exit(uobj->vmobjlock);
 4707 
 4708                         if (rv == FALSE)
 4709                                 error = EFAULT;
 4710                 }
 4711         }
 4712 
 4713         vm_map_unlock_read(map);
 4714         return error;
 4715 }
 4716 
 4717 /*
 4718  * UVM_MAP_CLIP_END implementation
 4719  */
 4720 void
 4721 uvm_map_clip_end(struct vm_map *map, struct vm_map_entry *entry, vaddr_t addr)
 4722 {
 4723         struct vm_map_entry *tmp;
 4724 
 4725         KASSERT(entry->start < addr && VMMAP_FREE_END(entry) > addr);
 4726         tmp = uvm_mapent_alloc(map, 0);
 4727 
 4728         /* Invoke splitentry. */
 4729         uvm_map_splitentry(map, entry, tmp, addr);
 4730 }
 4731 
 4732 /*
 4733  * UVM_MAP_CLIP_START implementation
 4734  *
 4735  * Clippers are required to not change the pointers to the entry they are
 4736  * clipping on.
 4737  * Since uvm_map_splitentry turns the original entry into the lowest
 4738  * entry (address wise) we do a swap between the new entry and the original
 4739  * entry, prior to calling uvm_map_splitentry.
 4740  */
 4741 void
 4742 uvm_map_clip_start(struct vm_map *map, struct vm_map_entry *entry, vaddr_t addr)
 4743 {
 4744         struct vm_map_entry *tmp;
 4745         struct uvm_addr_state *free;
 4746 
 4747         /* Unlink original. */
 4748         free = uvm_map_uaddr_e(map, entry);
 4749         uvm_mapent_free_remove(map, free, entry);
 4750         uvm_mapent_addr_remove(map, entry);
 4751 
 4752         /* Copy entry. */
 4753         KASSERT(entry->start < addr && VMMAP_FREE_END(entry) > addr);
 4754         tmp = uvm_mapent_alloc(map, 0);
 4755         uvm_mapent_copy(entry, tmp);
 4756 
 4757         /* Put new entry in place of original entry. */
 4758         uvm_mapent_addr_insert(map, tmp);
 4759         uvm_mapent_free_insert(map, free, tmp);
 4760 
 4761         /* Invoke splitentry. */
 4762         uvm_map_splitentry(map, tmp, entry, addr);
 4763 }
 4764 
 4765 /*
 4766  * Boundary fixer.
 4767  */
 4768 static inline vaddr_t uvm_map_boundfix(vaddr_t, vaddr_t, vaddr_t);
 4769 static inline vaddr_t
 4770 uvm_map_boundfix(vaddr_t min, vaddr_t max, vaddr_t bound)
 4771 {
 4772         return (min < bound && max > bound) ? bound : max;
 4773 }
 4774 
 4775 /*
 4776  * Choose free list based on address at start of free space.
 4777  *
 4778  * The uvm_addr_state returned contains addr and is the first of:
 4779  * - uaddr_exe
 4780  * - uaddr_brk_stack
 4781  * - uaddr_any
 4782  */
 4783 struct uvm_addr_state*
 4784 uvm_map_uaddr(struct vm_map *map, vaddr_t addr)
 4785 {
 4786         struct uvm_addr_state *uaddr;
 4787         int i;
 4788 
 4789         /* Special case the first page, to prevent mmap from returning 0. */
 4790         if (addr < VMMAP_MIN_ADDR)
 4791                 return NULL;
 4792 
 4793         /* Upper bound for kernel maps at uvm_maxkaddr. */
 4794         if ((map->flags & VM_MAP_ISVMSPACE) == 0) {
 4795                 if (addr >= uvm_maxkaddr)
 4796                         return NULL;
 4797         }
 4798 
 4799         /* Is the address inside the exe-only map? */
 4800         if (map->uaddr_exe != NULL && addr >= map->uaddr_exe->uaddr_minaddr &&
 4801             addr < map->uaddr_exe->uaddr_maxaddr)
 4802                 return map->uaddr_exe;
 4803 
 4804         /* Check if the space falls inside brk/stack area. */
 4805         if ((addr >= map->b_start && addr < map->b_end) ||
 4806             (addr >= map->s_start && addr < map->s_end)) {
 4807                 if (map->uaddr_brk_stack != NULL &&
 4808                     addr >= map->uaddr_brk_stack->uaddr_minaddr &&
 4809                     addr < map->uaddr_brk_stack->uaddr_maxaddr) {
 4810                         return map->uaddr_brk_stack;
 4811                 } else
 4812                         return NULL;
 4813         }
 4814 
 4815         /*
 4816          * Check the other selectors.
 4817          *
 4818          * These selectors are only marked as the owner, if they have insert
 4819          * functions.
 4820          */
 4821         for (i = 0; i < nitems(map->uaddr_any); i++) {
 4822                 uaddr = map->uaddr_any[i];
 4823                 if (uaddr == NULL)
 4824                         continue;
 4825                 if (uaddr->uaddr_functions->uaddr_free_insert == NULL)
 4826                         continue;
 4827 
 4828                 if (addr >= uaddr->uaddr_minaddr &&
 4829                     addr < uaddr->uaddr_maxaddr)
 4830                         return uaddr;
 4831         }
 4832 
 4833         return NULL;
 4834 }
 4835 
 4836 /*
 4837  * Choose free list based on address at start of free space.
 4838  *
 4839  * The uvm_addr_state returned contains addr and is the first of:
 4840  * - uaddr_exe
 4841  * - uaddr_brk_stack
 4842  * - uaddr_any
 4843  */
 4844 struct uvm_addr_state*
 4845 uvm_map_uaddr_e(struct vm_map *map, struct vm_map_entry *entry)
 4846 {
 4847         return uvm_map_uaddr(map, VMMAP_FREE_START(entry));
 4848 }
 4849 
 4850 /*
 4851  * Returns the first free-memory boundary that is crossed by [min-max].
 4852  */
 4853 vsize_t
 4854 uvm_map_boundary(struct vm_map *map, vaddr_t min, vaddr_t max)
 4855 {
 4856         struct uvm_addr_state   *uaddr;
 4857         int                      i;
 4858 
 4859         /* Never return first page. */
 4860         max = uvm_map_boundfix(min, max, VMMAP_MIN_ADDR);
 4861 
 4862         /* Treat the maxkaddr special, if the map is a kernel_map. */
 4863         if ((map->flags & VM_MAP_ISVMSPACE) == 0)
 4864                 max = uvm_map_boundfix(min, max, uvm_maxkaddr);
 4865 
 4866         /* Check for exe-only boundaries. */
 4867         if (map->uaddr_exe != NULL) {
 4868                 max = uvm_map_boundfix(min, max, map->uaddr_exe->uaddr_minaddr);
 4869                 max = uvm_map_boundfix(min, max, map->uaddr_exe->uaddr_maxaddr);
 4870         }
 4871 
 4872         /* Check for exe-only boundaries. */
 4873         if (map->uaddr_brk_stack != NULL) {
 4874                 max = uvm_map_boundfix(min, max,
 4875                     map->uaddr_brk_stack->uaddr_minaddr);
 4876                 max = uvm_map_boundfix(min, max,
 4877                     map->uaddr_brk_stack->uaddr_maxaddr);
 4878         }
 4879 
 4880         /* Check other boundaries. */
 4881         for (i = 0; i < nitems(map->uaddr_any); i++) {
 4882                 uaddr = map->uaddr_any[i];
 4883                 if (uaddr != NULL) {
 4884                         max = uvm_map_boundfix(min, max, uaddr->uaddr_minaddr);
 4885                         max = uvm_map_boundfix(min, max, uaddr->uaddr_maxaddr);
 4886                 }
 4887         }
 4888 
 4889         /* Boundaries at stack and brk() area. */
 4890         max = uvm_map_boundfix(min, max, map->s_start);
 4891         max = uvm_map_boundfix(min, max, map->s_end);
 4892         max = uvm_map_boundfix(min, max, map->b_start);
 4893         max = uvm_map_boundfix(min, max, map->b_end);
 4894 
 4895         return max;
 4896 }
 4897 
 4898 /*
 4899  * Update map allocation start and end addresses from proc vmspace.
 4900  */
 4901 void
 4902 uvm_map_vmspace_update(struct vm_map *map,
 4903     struct uvm_map_deadq *dead, int flags)
 4904 {
 4905         struct vmspace *vm;
 4906         vaddr_t b_start, b_end, s_start, s_end;
 4907 
 4908         KASSERT(map->flags & VM_MAP_ISVMSPACE);
 4909         KASSERT(offsetof(struct vmspace, vm_map) == 0);
 4910 
 4911         /*
 4912          * Derive actual allocation boundaries from vmspace.
 4913          */
 4914         vm = (struct vmspace *)map;
 4915         b_start = (vaddr_t)vm->vm_daddr;
 4916         b_end   = b_start + BRKSIZ;
 4917         s_start = MIN((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr);
 4918         s_end   = MAX((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr);
 4919 #ifdef DIAGNOSTIC
 4920         if ((b_start & (vaddr_t)PAGE_MASK) != 0 ||
 4921             (b_end & (vaddr_t)PAGE_MASK) != 0 ||
 4922             (s_start & (vaddr_t)PAGE_MASK) != 0 ||
 4923             (s_end & (vaddr_t)PAGE_MASK) != 0) {
 4924                 panic("uvm_map_vmspace_update: vmspace %p invalid bounds: "
 4925                     "b=0x%lx-0x%lx s=0x%lx-0x%lx",
 4926                     vm, b_start, b_end, s_start, s_end);
 4927         }
 4928 #endif
 4929 
 4930         if (__predict_true(map->b_start == b_start && map->b_end == b_end &&
 4931             map->s_start == s_start && map->s_end == s_end))
 4932                 return;
 4933 
 4934         uvm_map_freelist_update(map, dead, b_start, b_end,
 4935             s_start, s_end, flags);
 4936 }
 4937 
 4938 /*
 4939  * Grow kernel memory.
 4940  *
 4941  * This function is only called for kernel maps when an allocation fails.
 4942  *
 4943  * If the map has a gap that is large enough to accommodate alloc_sz, this
 4944  * function will make sure map->free will include it.
 4945  */
 4946 void
 4947 uvm_map_kmem_grow(struct vm_map *map, struct uvm_map_deadq *dead,
 4948     vsize_t alloc_sz, int flags)
 4949 {
 4950         vsize_t sz;
 4951         vaddr_t end;
 4952         struct vm_map_entry *entry;
 4953 
 4954         /* Kernel memory only. */
 4955         KASSERT((map->flags & VM_MAP_ISVMSPACE) == 0);
 4956         /* Destroy free list. */
 4957         uvm_map_freelist_update_clear(map, dead);
 4958 
 4959         /* Include the guard page in the hard minimum requirement of alloc_sz. */
 4960         if (map->flags & VM_MAP_GUARDPAGES)
 4961                 alloc_sz += PAGE_SIZE;
 4962 
 4963         /*
 4964          * Grow by ALLOCMUL * alloc_sz, but at least VM_MAP_KSIZE_DELTA.
 4965          *
 4966          * Don't handle the case where the multiplication overflows:
 4967          * if that happens, the allocation is probably too big anyway.
 4968          */
 4969         sz = MAX(VM_MAP_KSIZE_ALLOCMUL * alloc_sz, VM_MAP_KSIZE_DELTA);
 4970 
 4971         /*
 4972          * Walk forward until a gap large enough for alloc_sz shows up.
 4973          *
 4974          * We assume the kernel map has no boundaries.
 4975          * uvm_maxkaddr may be zero.
 4976          */
 4977         end = MAX(uvm_maxkaddr, map->min_offset);
 4978         entry = uvm_map_entrybyaddr(&map->addr, end);
 4979         while (entry && entry->fspace < alloc_sz)
 4980                 entry = RBT_NEXT(uvm_map_addr, entry);
 4981         if (entry) {
 4982                 end = MAX(VMMAP_FREE_START(entry), end);
 4983                 end += MIN(sz, map->max_offset - end);
 4984         } else
 4985                 end = map->max_offset;
 4986 
 4987         /* Reserve pmap entries. */
 4988 #ifdef PMAP_GROWKERNEL
 4989         uvm_maxkaddr = pmap_growkernel(end);
 4990 #else
 4991         uvm_maxkaddr = MAX(uvm_maxkaddr, end);
 4992 #endif
 4993 
 4994         /* Rebuild free list. */
 4995         uvm_map_freelist_update_refill(map, flags);
 4996 }
 4997 
 4998 /*
 4999  * Freelist update subfunction: unlink all entries from freelists.
 5000  */
 5001 void
 5002 uvm_map_freelist_update_clear(struct vm_map *map, struct uvm_map_deadq *dead)
 5003 {
 5004         struct uvm_addr_state *free;
 5005         struct vm_map_entry *entry, *prev, *next;
 5006 
 5007         prev = NULL;
 5008         for (entry = RBT_MIN(uvm_map_addr, &map->addr); entry != NULL;
 5009             entry = next) {
 5010                 next = RBT_NEXT(uvm_map_addr, entry);
 5011 
 5012                 free = uvm_map_uaddr_e(map, entry);
 5013                 uvm_mapent_free_remove(map, free, entry);
 5014 
 5015                 if (prev != NULL && entry->start == entry->end) {
 5016                         prev->fspace += VMMAP_FREE_END(entry) - entry->end;
 5017                         uvm_mapent_addr_remove(map, entry);
 5018                         DEAD_ENTRY_PUSH(dead, entry);
 5019                 } else
 5020                         prev = entry;
 5021         }
 5022 }
 5023 
 5024 /*
 5025  * Freelist update subfunction: refill the freelists with entries.
 5026  */
 5027 void
 5028 uvm_map_freelist_update_refill(struct vm_map *map, int flags)
 5029 {
 5030         struct vm_map_entry *entry;
 5031         vaddr_t min, max;
 5032 
 5033         RBT_FOREACH(entry, uvm_map_addr, &map->addr) {
 5034                 min = VMMAP_FREE_START(entry);
 5035                 max = VMMAP_FREE_END(entry);
 5036                 entry->fspace = 0;
 5037 
 5038                 entry = uvm_map_fix_space(map, entry, min, max, flags);
 5039         }
 5040 
 5041         uvm_tree_sanity(map, __FILE__, __LINE__);
 5042 }
 5043 
 5044 /*
 5045  * Change {a,b}_{start,end} allocation ranges and associated free lists.
 5046  */
 5047 void
 5048 uvm_map_freelist_update(struct vm_map *map, struct uvm_map_deadq *dead,
 5049     vaddr_t b_start, vaddr_t b_end, vaddr_t s_start, vaddr_t s_end, int flags)
 5050 {
 5051         KDASSERT(b_end >= b_start && s_end >= s_start);
 5052         vm_map_assert_wrlock(map);
 5053 
 5054         /* Clear all free lists. */
 5055         uvm_map_freelist_update_clear(map, dead);
 5056 
 5057         /* Apply new bounds. */
 5058         map->b_start = b_start;
 5059         map->b_end   = b_end;
 5060         map->s_start = s_start;
 5061         map->s_end   = s_end;
 5062 
 5063         /* Refill free lists. */
 5064         uvm_map_freelist_update_refill(map, flags);
 5065 }
 5066 
 5067 /*
 5068  * Assign a uvm_addr_state to the specified pointer in vm_map.
 5069  *
 5070  * May sleep.
 5071  */
 5072 void
 5073 uvm_map_set_uaddr(struct vm_map *map, struct uvm_addr_state **which,
 5074     struct uvm_addr_state *newval)
 5075 {
 5076         struct uvm_map_deadq dead;
 5077 
 5078         /* Pointer which must be in this map. */
 5079         KASSERT(which != NULL);
 5080         KASSERT((void*)map <= (void*)(which) &&
 5081             (void*)(which) < (void*)(map + 1));
 5082 
 5083         vm_map_lock(map);
 5084         TAILQ_INIT(&dead);
 5085         uvm_map_freelist_update_clear(map, &dead);
 5086 
 5087         uvm_addr_destroy(*which);
 5088         *which = newval;
 5089 
 5090         uvm_map_freelist_update_refill(map, 0);
 5091         vm_map_unlock(map);
 5092         uvm_unmap_detach(&dead, 0);
 5093 }
 5094 
 5095 /*
 5096  * Correct space insert.
 5097  *
 5098  * Entry must not be on any freelist.
 5099  */
 5100 struct vm_map_entry*
 5101 uvm_map_fix_space(struct vm_map *map, struct vm_map_entry *entry,
 5102     vaddr_t min, vaddr_t max, int flags)
 5103 {
 5104         struct uvm_addr_state   *free, *entfree;
 5105         vaddr_t                  lmax;
 5106 
 5107         KASSERT(entry == NULL || (entry->etype & UVM_ET_FREEMAPPED) == 0);
 5108         KDASSERT(min <= max);
 5109         KDASSERT((entry != NULL && VMMAP_FREE_END(entry) == min) ||
 5110             min == map->min_offset);
 5111 
 5112         UVM_MAP_REQ_WRITE(map);
 5113 
 5114         /*
 5115          * During the function, entfree will always point at the uaddr state
 5116          * for entry.
 5117          */
 5118         entfree = (entry == NULL ? NULL :
 5119             uvm_map_uaddr_e(map, entry));
 5120 
 5121         while (min != max) {
 5122                 /* Claim guard page for entry. */
 5123                 if ((map->flags & VM_MAP_GUARDPAGES) && entry != NULL &&
 5124                     VMMAP_FREE_END(entry) == entry->end &&
 5125                     entry->start != entry->end) {
 5126                         if (max - min == 2 * PAGE_SIZE) {
 5127                                 /*
 5128                                  * If the free-space gap is exactly 2 pages,
 5129                                  * we make the guard 2 pages instead of 1.
 5130                                  * Because in a guarded map, an area needs
 5131                                  * at least 2 pages to allocate from:
 5132                                  * one page for the allocation and one for
 5133                                  * the guard.
 5134                                  */
 5135                                 entry->guard = 2 * PAGE_SIZE;
 5136                                 min = max;
 5137                         } else {
 5138                                 entry->guard = PAGE_SIZE;
 5139                                 min += PAGE_SIZE;
 5140                         }
 5141                         continue;
 5142                 }
 5143 
 5144                 /*
 5145                  * Handle the case where entry has a 2-page guard, but the
 5146                  * space after entry is freed.
 5147                  */
 5148                 if (entry != NULL && entry->fspace == 0 &&
 5149                     entry->guard > PAGE_SIZE) {
 5150                         entry->guard = PAGE_SIZE;
 5151                         min = VMMAP_FREE_START(entry);
 5152                 }
 5153 
 5154                 lmax = uvm_map_boundary(map, min, max);
 5155                 free = uvm_map_uaddr(map, min);
 5156 
 5157                 /*
 5158                  * Entries are merged if they point at the same uvm_free().
 5159                  * Exception to that rule: if min == uvm_maxkaddr, a new
 5160                  * entry is started regardless (otherwise the allocators
 5161                  * will get confused).
 5162                  */
 5163                 if (entry != NULL && free == entfree &&
 5164                     !((map->flags & VM_MAP_ISVMSPACE) == 0 &&
 5165                     min == uvm_maxkaddr)) {
 5166                         KDASSERT(VMMAP_FREE_END(entry) == min);
 5167                         entry->fspace += lmax - min;
 5168                 } else {
 5169                         /*
 5170                          * Commit entry to free list: it'll not be added to
 5171                          * anymore.
 5172                          * We'll start a new entry and add to that entry
 5173                          * instead.
 5174                          */
 5175                         if (entry != NULL)
 5176                                 uvm_mapent_free_insert(map, entfree, entry);
 5177 
 5178                         /* New entry for new uaddr. */
 5179                         entry = uvm_mapent_alloc(map, flags);
 5180                         KDASSERT(entry != NULL);
 5181                         entry->end = entry->start = min;
 5182                         entry->guard = 0;
 5183                         entry->fspace = lmax - min;
 5184                         entry->object.uvm_obj = NULL;
 5185                         entry->offset = 0;
 5186                         entry->etype = 0;
 5187                         entry->protection = entry->max_protection = 0;
 5188                         entry->inheritance = 0;
 5189                         entry->wired_count = 0;
 5190                         entry->advice = 0;
 5191                         entry->aref.ar_pageoff = 0;
 5192                         entry->aref.ar_amap = NULL;
 5193                         uvm_mapent_addr_insert(map, entry);
 5194 
 5195                         entfree = free;
 5196                 }
 5197 
 5198                 min = lmax;
 5199         }
 5200         /* Finally put entry on the uaddr state. */
 5201         if (entry != NULL)
 5202                 uvm_mapent_free_insert(map, entfree, entry);
 5203 
 5204         return entry;
 5205 }
 5206 
 5207 /*
 5208  * MQuery style of allocation.
 5209  *
 5210  * This allocator searches forward until sufficient space is found to map
 5211  * the given size.
 5212  *
 5213  * XXX: factor in offset (via pmap_prefer) and protection?
 5214  */
 5215 int
 5216 uvm_map_mquery(struct vm_map *map, vaddr_t *addr_p, vsize_t sz, voff_t offset,
 5217     int flags)
 5218 {
 5219         struct vm_map_entry *entry, *last;
 5220         vaddr_t addr;
 5221         vaddr_t tmp, pmap_align, pmap_offset;
 5222         int error;
 5223 
 5224         addr = *addr_p;
 5225         vm_map_lock_read(map);
 5226 
 5227         /* Configure pmap prefer. */
 5228         if (offset != UVM_UNKNOWN_OFFSET) {
 5229                 pmap_align = MAX(PAGE_SIZE, PMAP_PREFER_ALIGN());
 5230                 pmap_offset = PMAP_PREFER_OFFSET(offset);
 5231         } else {
 5232                 pmap_align = PAGE_SIZE;
 5233                 pmap_offset = 0;
 5234         }
 5235 
 5236         /* Align address to pmap_prefer unless FLAG_FIXED is set. */
 5237         if (!(flags & UVM_FLAG_FIXED) && offset != UVM_UNKNOWN_OFFSET) {
 5238                 tmp = (addr & ~(pmap_align - 1)) | pmap_offset;
 5239                 if (tmp < addr)
 5240                         tmp += pmap_align;
 5241                 addr = tmp;
 5242         }
 5243 
 5244         /* First, check if the requested range is fully available. */
 5245         entry = uvm_map_entrybyaddr(&map->addr, addr);
 5246         last = NULL;
 5247         if (uvm_map_isavail(map, NULL, &entry, &last, addr, sz)) {
 5248                 error = 0;
 5249                 goto out;
 5250         }
 5251         if (flags & UVM_FLAG_FIXED) {
 5252                 error = EINVAL;
 5253                 goto out;
 5254         }
 5255 
 5256         error = ENOMEM; /* Default error from here. */
 5257 
 5258         /*
 5259          * At this point, the memory at <addr, sz> is not available.
 5260          * The reasons are:
 5261          * [1] it's outside the map,
 5262          * [2] it starts in used memory (and therefore needs to move
 5263          *     toward the first free page in entry),
 5264          * [3] it starts in free memory but bumps into used memory.
 5265          *
 5266          * Note that for case [2], the forward moving is handled by the
 5267          * for loop below.
 5268          */
 5269         if (entry == NULL) {
 5270                 /* [1] Outside the map. */
 5271                 if (addr >= map->max_offset)
 5272                         goto out;
 5273                 else
 5274                         entry = RBT_MIN(uvm_map_addr, &map->addr);
 5275         } else if (VMMAP_FREE_START(entry) <= addr) {
 5276                 /* [3] Bumped into used memory. */
 5277                 entry = RBT_NEXT(uvm_map_addr, entry);
 5278         }
 5279 
 5280         /* Test if the next entry is sufficient for the allocation. */
 5281         for (; entry != NULL;
 5282             entry = RBT_NEXT(uvm_map_addr, entry)) {
 5283                 if (entry->fspace == 0)
 5284                         continue;
 5285                 addr = VMMAP_FREE_START(entry);
 5286 
 5287 restart:        /* Restart address checks on address change. */
 5288                 tmp = (addr & ~(pmap_align - 1)) | pmap_offset;
 5289                 if (tmp < addr)
 5290                         tmp += pmap_align;
 5291                 addr = tmp;
 5292                 if (addr >= VMMAP_FREE_END(entry))
 5293                         continue;
 5294 
 5295                 /* Skip brk() allocation addresses. */
 5296                 if (addr + sz > map->b_start && addr < map->b_end) {
 5297                         if (VMMAP_FREE_END(entry) > map->b_end) {
 5298                                 addr = map->b_end;
 5299                                 goto restart;
 5300                         } else
 5301                                 continue;
 5302                 }
 5303                 /* Skip stack allocation addresses. */
 5304                 if (addr + sz > map->s_start && addr < map->s_end) {
 5305                         if (VMMAP_FREE_END(entry) > map->s_end) {
 5306                                 addr = map->s_end;
 5307                                 goto restart;
 5308                         } else
 5309                                 continue;
 5310                 }
 5311 
 5312                 last = NULL;
 5313                 if (uvm_map_isavail(map, NULL, &entry, &last, addr, sz)) {
 5314                         error = 0;
 5315                         goto out;
 5316                 }
 5317         }
 5318 
 5319 out:
 5320         vm_map_unlock_read(map);
 5321         if (error == 0)
 5322                 *addr_p = addr;
 5323         return error;
 5324 }
 5325 
 5326 boolean_t
 5327 vm_map_lock_try_ln(struct vm_map *map, char *file, int line)
 5328 {
 5329         boolean_t rv;
 5330 
 5331         if (map->flags & VM_MAP_INTRSAFE) {
 5332                 rv = mtx_enter_try(&map->mtx);
 5333         } else {
 5334                 mtx_enter(&map->flags_lock);
 5335                 if (map->flags & VM_MAP_BUSY) {
 5336                         mtx_leave(&map->flags_lock);
 5337                         return (FALSE);
 5338                 }
 5339                 mtx_leave(&map->flags_lock);
 5340                 rv = (rw_enter(&map->lock, RW_WRITE|RW_NOSLEEP) == 0);
 5341                 /* check if the lock is busy and back out if we won the race */
 5342                 if (rv) {
 5343                         mtx_enter(&map->flags_lock);
 5344                         if (map->flags & VM_MAP_BUSY) {
 5345                                 rw_exit(&map->lock);
 5346                                 rv = FALSE;
 5347                         }
 5348                         mtx_leave(&map->flags_lock);
 5349                 }
 5350         }
 5351 
 5352         if (rv) {
 5353                 map->timestamp++;
 5354                 LPRINTF(("map   lock: %p (at %s %d)\n", map, file, line));
 5355                 uvm_tree_sanity(map, file, line);
 5356                 uvm_tree_size_chk(map, file, line);
 5357         }
 5358 
 5359         return (rv);
 5360 }
 5361 
 5362 void
 5363 vm_map_lock_ln(struct vm_map *map, char *file, int line)
 5364 {
 5365         if ((map->flags & VM_MAP_INTRSAFE) == 0) {
 5366                 do {
 5367                         mtx_enter(&map->flags_lock);
 5368 tryagain:
 5369                         while (map->flags & VM_MAP_BUSY) {
 5370                                 map->flags |= VM_MAP_WANTLOCK;
 5371                                 msleep_nsec(&map->flags, &map->flags_lock,
 5372                                     PVM, vmmapbsy, INFSLP);
 5373                         }
 5374                         mtx_leave(&map->flags_lock);
 5375                 } while (rw_enter(&map->lock, RW_WRITE|RW_SLEEPFAIL) != 0);
 5376                 /* check if the lock is busy and back out if we won the race */
 5377                 mtx_enter(&map->flags_lock);
 5378                 if (map->flags & VM_MAP_BUSY) {
 5379                         rw_exit(&map->lock);
 5380                         goto tryagain;
 5381                 }
 5382                 mtx_leave(&map->flags_lock);
 5383         } else {
 5384                 mtx_enter(&map->mtx);
 5385         }
 5386 
 5387         map->timestamp++;
 5388         LPRINTF(("map   lock: %p (at %s %d)\n", map, file, line));
 5389         uvm_tree_sanity(map, file, line);
 5390         uvm_tree_size_chk(map, file, line);
 5391 }
 5392 
 5393 void
 5394 vm_map_lock_read_ln(struct vm_map *map, char *file, int line)
 5395 {
 5396         if ((map->flags & VM_MAP_INTRSAFE) == 0)
 5397                 rw_enter_read(&map->lock);
 5398         else
 5399                 mtx_enter(&map->mtx);
 5400         LPRINTF(("map   lock: %p (at %s %d)\n", map, file, line));
 5401         uvm_tree_sanity(map, file, line);
 5402         uvm_tree_size_chk(map, file, line);
 5403 }
 5404 
 5405 void
 5406 vm_map_unlock_ln(struct vm_map *map, char *file, int line)
 5407 {
 5408         uvm_tree_sanity(map, file, line);
 5409         uvm_tree_size_chk(map, file, line);
 5410         LPRINTF(("map unlock: %p (at %s %d)\n", map, file, line));
 5411         if ((map->flags & VM_MAP_INTRSAFE) == 0)
 5412                 rw_exit(&map->lock);
 5413         else
 5414                 mtx_leave(&map->mtx);
 5415 }
 5416 
 5417 void
 5418 vm_map_unlock_read_ln(struct vm_map *map, char *file, int line)
 5419 {
 5420         /* XXX: RO */ uvm_tree_sanity(map, file, line);
 5421         /* XXX: RO */ uvm_tree_size_chk(map, file, line);
 5422         LPRINTF(("map unlock: %p (at %s %d)\n", map, file, line));
 5423         if ((map->flags & VM_MAP_INTRSAFE) == 0)
 5424                 rw_exit_read(&map->lock);
 5425         else
 5426                 mtx_leave(&map->mtx);
 5427 }
 5428 
 5429 void
 5430 vm_map_downgrade_ln(struct vm_map *map, char *file, int line)
 5431 {
 5432         uvm_tree_sanity(map, file, line);
 5433         uvm_tree_size_chk(map, file, line);
 5434         LPRINTF(("map unlock: %p (at %s %d)\n", map, file, line));
 5435         LPRINTF(("map   lock: %p (at %s %d)\n", map, file, line));
 5436         KASSERT((map->flags & VM_MAP_INTRSAFE) == 0);
 5437         if ((map->flags & VM_MAP_INTRSAFE) == 0)
 5438                 rw_enter(&map->lock, RW_DOWNGRADE);
 5439 }
 5440 
 5441 void
 5442 vm_map_upgrade_ln(struct vm_map *map, char *file, int line)
 5443 {
 5444         /* XXX: RO */ uvm_tree_sanity(map, file, line);
 5445         /* XXX: RO */ uvm_tree_size_chk(map, file, line);
 5446         LPRINTF(("map unlock: %p (at %s %d)\n", map, file, line));
 5447         KASSERT((map->flags & VM_MAP_INTRSAFE) == 0);
 5448         if ((map->flags & VM_MAP_INTRSAFE) == 0) {
 5449                 rw_exit_read(&map->lock);
 5450                 rw_enter_write(&map->lock);
 5451         }
 5452         LPRINTF(("map   lock: %p (at %s %d)\n", map, file, line));
 5453         uvm_tree_sanity(map, file, line);
 5454 }
 5455 
 5456 void
 5457 vm_map_busy_ln(struct vm_map *map, char *file, int line)
 5458 {
 5459         KASSERT((map->flags & VM_MAP_INTRSAFE) == 0);
 5460         mtx_enter(&map->flags_lock);
 5461         map->flags |= VM_MAP_BUSY;
 5462         mtx_leave(&map->flags_lock);
 5463 }
 5464 
 5465 void
 5466 vm_map_unbusy_ln(struct vm_map *map, char *file, int line)
 5467 {
 5468         int oflags;
 5469 
 5470         KASSERT((map->flags & VM_MAP_INTRSAFE) == 0);
 5471         mtx_enter(&map->flags_lock);
 5472         oflags = map->flags;
 5473         map->flags &= ~(VM_MAP_BUSY|VM_MAP_WANTLOCK);
 5474         mtx_leave(&map->flags_lock);
 5475         if (oflags & VM_MAP_WANTLOCK)
 5476                 wakeup(&map->flags);
 5477 }
 5478 
 5479 void
 5480 vm_map_assert_anylock_ln(struct vm_map *map, char *file, int line)
 5481 {
 5482         LPRINTF(("map assert read or write locked: %p (at %s %d)\n", map, file, line));
 5483         if ((map->flags & VM_MAP_INTRSAFE) == 0)
 5484                 rw_assert_anylock(&map->lock);
 5485         else
 5486                 MUTEX_ASSERT_LOCKED(&map->mtx);
 5487 }
 5488 
 5489 void
 5490 vm_map_assert_wrlock_ln(struct vm_map *map, char *file, int line)
 5491 {
 5492         LPRINTF(("map assert write locked: %p (at %s %d)\n", map, file, line));
 5493         if ((map->flags & VM_MAP_INTRSAFE) == 0) {
 5494                 splassert(IPL_NONE);
 5495                 rw_assert_wrlock(&map->lock);
 5496         } else
 5497                 MUTEX_ASSERT_LOCKED(&map->mtx);
 5498 }
 5499 
 5500 #ifndef SMALL_KERNEL
 5501 int
 5502 uvm_map_fill_vmmap(struct vm_map *map, struct kinfo_vmentry *kve,
 5503     size_t *lenp)
 5504 {
 5505         struct vm_map_entry *entry;
 5506         vaddr_t start;
 5507         int cnt, maxcnt, error = 0;
 5508 
 5509         KASSERT(*lenp > 0);
 5510         KASSERT((*lenp % sizeof(*kve)) == 0);
 5511         cnt = 0;
 5512         maxcnt = *lenp / sizeof(*kve);
 5513         KASSERT(maxcnt > 0);
 5514 
 5515         /*
 5516          * Return only entries whose address is above the given base
 5517          * address.  This allows userland to iterate without knowing the
 5518          * number of entries beforehand.
 5519          */
 5520         start = (vaddr_t)kve[0].kve_start;
 5521 
 5522         vm_map_lock(map);
 5523         RBT_FOREACH(entry, uvm_map_addr, &map->addr) {
 5524                 if (cnt == maxcnt) {
 5525                         error = ENOMEM;
 5526                         break;
 5527                 }
 5528                 if (start != 0 && entry->start < start)
 5529                         continue;
 5530                 kve->kve_start = entry->start;
 5531                 kve->kve_end = entry->end;
 5532                 kve->kve_guard = entry->guard;
 5533                 kve->kve_fspace = entry->fspace;
 5534                 kve->kve_fspace_augment = entry->fspace_augment;
 5535                 kve->kve_offset = entry->offset;
 5536                 kve->kve_wired_count = entry->wired_count;
 5537                 kve->kve_etype = entry->etype;
 5538                 kve->kve_protection = entry->protection;
 5539                 kve->kve_max_protection = entry->max_protection;
 5540                 kve->kve_advice = entry->advice;
 5541                 kve->kve_inheritance = entry->inheritance;
 5542                 kve->kve_flags = entry->flags;
 5543                 kve++;
 5544                 cnt++;
 5545         }
 5546         vm_map_unlock(map);
 5547 
 5548         KASSERT(cnt <= maxcnt);
 5549 
 5550         *lenp = sizeof(*kve) * cnt;
 5551         return error;
 5552 }
 5553 #endif
 5554 
 5555 
 5556 RBT_GENERATE_AUGMENT(uvm_map_addr, vm_map_entry, daddrs.addr_entry,
 5557     uvm_mapentry_addrcmp, uvm_map_addr_augment);
 5558 
 5559 
 5560 /*
 5561  * MD code: vmspace allocator setup.
 5562  */
 5563 
 5564 #ifdef __i386__
 5565 void
 5566 uvm_map_setup_md(struct vm_map *map)
 5567 {
 5568         vaddr_t         min, max;
 5569 
 5570         min = map->min_offset;
 5571         max = map->max_offset;
 5572 
 5573         /*
 5574          * Ensure the selectors will not try to manage page 0;
 5575          * it's too special.
 5576          */
 5577         if (min < VMMAP_MIN_ADDR)
 5578                 min = VMMAP_MIN_ADDR;
 5579 
 5580 #if 0   /* Cool stuff, not yet */
 5581         /* Executable code is special. */
 5582         map->uaddr_exe = uaddr_rnd_create(min, I386_MAX_EXE_ADDR);
 5583         /* Place normal allocations beyond executable mappings. */
 5584         map->uaddr_any[3] = uaddr_pivot_create(2 * I386_MAX_EXE_ADDR, max);
 5585 #else   /* Crappy stuff, for now */
 5586         map->uaddr_any[0] = uaddr_rnd_create(min, max);
 5587 #endif
 5588 
 5589 #ifndef SMALL_KERNEL
 5590         map->uaddr_brk_stack = uaddr_stack_brk_create(min, max);
 5591 #endif /* !SMALL_KERNEL */
 5592 }
 5593 #elif __LP64__
 5594 void
 5595 uvm_map_setup_md(struct vm_map *map)
 5596 {
 5597         vaddr_t         min, max;
 5598 
 5599         min = map->min_offset;
 5600         max = map->max_offset;
 5601 
 5602         /*
 5603          * Ensure the selectors will not try to manage page 0;
 5604          * it's too special.
 5605          */
 5606         if (min < VMMAP_MIN_ADDR)
 5607                 min = VMMAP_MIN_ADDR;
 5608 
 5609 #if 0   /* Cool stuff, not yet */
 5610         map->uaddr_any[3] = uaddr_pivot_create(MAX(min, 0x100000000ULL), max);
 5611 #else   /* Crappy stuff, for now */
 5612         map->uaddr_any[0] = uaddr_rnd_create(min, max);
 5613 #endif
 5614 
 5615 #ifndef SMALL_KERNEL
 5616         map->uaddr_brk_stack = uaddr_stack_brk_create(min, max);
 5617 #endif /* !SMALL_KERNEL */
 5618 }
 5619 #else   /* non-i386, 32 bit */
 5620 void
 5621 uvm_map_setup_md(struct vm_map *map)
 5622 {
 5623         vaddr_t         min, max;
 5624 
 5625         min = map->min_offset;
 5626         max = map->max_offset;
 5627 
 5628         /*
 5629          * Ensure the selectors will not try to manage page 0;
 5630          * it's too special.
 5631          */
 5632         if (min < VMMAP_MIN_ADDR)
 5633                 min = VMMAP_MIN_ADDR;
 5634 
 5635 #if 0   /* Cool stuff, not yet */
 5636         map->uaddr_any[3] = uaddr_pivot_create(min, max);
 5637 #else   /* Crappy stuff, for now */
 5638         map->uaddr_any[0] = uaddr_rnd_create(min, max);
 5639 #endif
 5640 
 5641 #ifndef SMALL_KERNEL
 5642         map->uaddr_brk_stack = uaddr_stack_brk_create(min, max);
 5643 #endif /* !SMALL_KERNEL */
 5644 }
 5645 #endif

Cache object: ca35e42883e10875f33fbd23351d8a7a


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.