The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/uvm/uvm_map.c

Version: -  FREEBSD  -  FREEBSD11  -  FREEBSD10  -  FREEBSD9  -  FREEBSD92  -  FREEBSD91  -  FREEBSD90  -  FREEBSD8  -  FREEBSD82  -  FREEBSD81  -  FREEBSD80  -  FREEBSD7  -  FREEBSD74  -  FREEBSD73  -  FREEBSD72  -  FREEBSD71  -  FREEBSD70  -  FREEBSD6  -  FREEBSD64  -  FREEBSD63  -  FREEBSD62  -  FREEBSD61  -  FREEBSD60  -  FREEBSD5  -  FREEBSD55  -  FREEBSD54  -  FREEBSD53  -  FREEBSD52  -  FREEBSD51  -  FREEBSD50  -  FREEBSD4  -  FREEBSD3  -  FREEBSD22  -  linux-2.6  -  linux-2.4.22  -  MK83  -  MK84  -  PLAN9  -  DFBSD  -  NETBSD  -  NETBSD5  -  NETBSD4  -  NETBSD3  -  NETBSD20  -  OPENBSD  -  xnu-517  -  xnu-792  -  xnu-792.6.70  -  xnu-1228  -  xnu-1456.1.26  -  xnu-1699.24.8  -  xnu-2050.18.24  -  OPENSOLARIS  -  minix-3-1-1 
SearchContext: -  none  -  3  -  10 

    1 /*      $NetBSD: uvm_map.c,v 1.186.2.2 2005/05/01 11:05:06 tron Exp $   */
    2 
    3 /*
    4  * Copyright (c) 1997 Charles D. Cranor and Washington University.
    5  * Copyright (c) 1991, 1993, The Regents of the University of California.
    6  *
    7  * All rights reserved.
    8  *
    9  * This code is derived from software contributed to Berkeley by
   10  * The Mach Operating System project at Carnegie-Mellon University.
   11  *
   12  * Redistribution and use in source and binary forms, with or without
   13  * modification, are permitted provided that the following conditions
   14  * are met:
   15  * 1. Redistributions of source code must retain the above copyright
   16  *    notice, this list of conditions and the following disclaimer.
   17  * 2. Redistributions in binary form must reproduce the above copyright
   18  *    notice, this list of conditions and the following disclaimer in the
   19  *    documentation and/or other materials provided with the distribution.
   20  * 3. All advertising materials mentioning features or use of this software
   21  *    must display the following acknowledgement:
   22  *      This product includes software developed by Charles D. Cranor,
   23  *      Washington University, the University of California, Berkeley and
   24  *      its contributors.
   25  * 4. Neither the name of the University nor the names of its contributors
   26  *    may be used to endorse or promote products derived from this software
   27  *    without specific prior written permission.
   28  *
   29  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   30  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   31  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   32  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   33  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   34  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   35  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   36  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   37  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   38  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   39  * SUCH DAMAGE.
   40  *
   41  *      @(#)vm_map.c    8.3 (Berkeley) 1/12/94
   42  * from: Id: uvm_map.c,v 1.1.2.27 1998/02/07 01:16:54 chs Exp
   43  *
   44  *
   45  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
   46  * All rights reserved.
   47  *
   48  * Permission to use, copy, modify and distribute this software and
   49  * its documentation is hereby granted, provided that both the copyright
   50  * notice and this permission notice appear in all copies of the
   51  * software, derivative works or modified versions, and any portions
   52  * thereof, and that both notices appear in supporting documentation.
   53  *
   54  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
   55  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
   56  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
   57  *
   58  * Carnegie Mellon requests users of this software to return to
   59  *
   60  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
   61  *  School of Computer Science
   62  *  Carnegie Mellon University
   63  *  Pittsburgh PA 15213-3890
   64  *
   65  * any improvements or extensions that they make and grant Carnegie the
   66  * rights to redistribute these changes.
   67  */
   68 
   69 /*
   70  * uvm_map.c: uvm map operations
   71  */
   72 
   73 #include <sys/cdefs.h>
   74 __KERNEL_RCSID(0, "$NetBSD: uvm_map.c,v 1.186.2.2 2005/05/01 11:05:06 tron Exp $");
   75 
   76 #include "opt_ddb.h"
   77 #include "opt_uvmhist.h"
   78 #include "opt_uvm.h"
   79 #include "opt_sysv.h"
   80 
   81 #include <sys/param.h>
   82 #include <sys/systm.h>
   83 #include <sys/mman.h>
   84 #include <sys/proc.h>
   85 #include <sys/malloc.h>
   86 #include <sys/pool.h>
   87 #include <sys/kernel.h>
   88 #include <sys/mount.h>
   89 #include <sys/vnode.h>
   90 
   91 #ifdef SYSVSHM
   92 #include <sys/shm.h>
   93 #endif
   94 
   95 #define UVM_MAP
   96 #include <uvm/uvm.h>
   97 #undef RB_AUGMENT
   98 #define RB_AUGMENT(x)   uvm_rb_augment(x)
   99 
  100 #ifdef DDB
  101 #include <uvm/uvm_ddb.h>
  102 #endif
  103 
  104 #ifndef UVMMAP_NOCOUNTERS
  105 #include <sys/device.h>
  106 struct evcnt map_ubackmerge = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL,
  107     "uvmmap", "ubackmerge");
  108 struct evcnt map_uforwmerge = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL,
  109     "uvmmap", "uforwmerge");
  110 struct evcnt map_ubimerge = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL,
  111     "uvmmap", "ubimerge");
  112 struct evcnt map_unomerge = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL,
  113     "uvmmap", "unomerge");
  114 struct evcnt map_kbackmerge = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL,
  115     "uvmmap", "kbackmerge");
  116 struct evcnt map_kforwmerge = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL,
  117     "uvmmap", "kforwmerge");
  118 struct evcnt map_kbimerge = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL,
  119     "uvmmap", "kbimerge");
  120 struct evcnt map_knomerge = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL,
  121     "uvmmap", "knomerge");
  122 struct evcnt uvm_map_call = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL,
  123     "uvmmap", "map_call");
  124 struct evcnt uvm_mlk_call = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL,
  125     "uvmmap", "mlk_call");
  126 struct evcnt uvm_mlk_hint = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL,
  127     "uvmmap", "mlk_hint");
  128 
  129 EVCNT_ATTACH_STATIC(map_ubackmerge);
  130 EVCNT_ATTACH_STATIC(map_uforwmerge);
  131 EVCNT_ATTACH_STATIC(map_ubimerge);
  132 EVCNT_ATTACH_STATIC(map_unomerge);
  133 EVCNT_ATTACH_STATIC(map_kbackmerge);
  134 EVCNT_ATTACH_STATIC(map_kforwmerge);
  135 EVCNT_ATTACH_STATIC(map_kbimerge);
  136 EVCNT_ATTACH_STATIC(map_knomerge);
  137 EVCNT_ATTACH_STATIC(uvm_map_call);
  138 EVCNT_ATTACH_STATIC(uvm_mlk_call);
  139 EVCNT_ATTACH_STATIC(uvm_mlk_hint);
  140 
  141 #define UVMCNT_INCR(ev)         ev.ev_count++
  142 #define UVMCNT_DECR(ev)         ev.ev_count--
  143 #else
  144 #define UVMCNT_INCR(ev)
  145 #define UVMCNT_DECR(ev)
  146 #endif
  147 
  148 const char vmmapbsy[] = "vmmapbsy";
  149 
  150 /*
  151  * pool for vmspace structures.
  152  */
  153 
  154 POOL_INIT(uvm_vmspace_pool, sizeof(struct vmspace), 0, 0, 0, "vmsppl",
  155     &pool_allocator_nointr);
  156 
  157 /*
  158  * pool for dynamically-allocated map entries.
  159  */
  160 
  161 POOL_INIT(uvm_map_entry_pool, sizeof(struct vm_map_entry), 0, 0, 0, "vmmpepl",
  162     &pool_allocator_nointr);
  163 
  164 MALLOC_DEFINE(M_VMMAP, "VM map", "VM map structures");
  165 MALLOC_DEFINE(M_VMPMAP, "VM pmap", "VM pmap");
  166 
  167 #ifdef PMAP_GROWKERNEL
  168 /*
  169  * This global represents the end of the kernel virtual address
  170  * space.  If we want to exceed this, we must grow the kernel
  171  * virtual address space dynamically.
  172  *
  173  * Note, this variable is locked by kernel_map's lock.
  174  */
  175 vaddr_t uvm_maxkaddr;
  176 #endif
  177 
  178 /*
  179  * macros
  180  */
  181 
  182 /*
  183  * VM_MAP_USE_KMAPENT: determine if uvm_kmapent_alloc/free is used
  184  * for the vm_map.
  185  */
  186 extern struct vm_map *pager_map; /* XXX */
  187 #define VM_MAP_USE_KMAPENT(map) \
  188         (((map)->flags & VM_MAP_INTRSAFE) || (map) == kernel_map)
  189 
  190 /*
  191  * uvm_map_entry_link: insert entry into a map
  192  *
  193  * => map must be locked
  194  */
  195 #define uvm_map_entry_link(map, after_where, entry) do { \
  196         KASSERT(entry->start < entry->end); \
  197         (map)->nentries++; \
  198         (entry)->prev = (after_where); \
  199         (entry)->next = (after_where)->next; \
  200         (entry)->prev->next = (entry); \
  201         (entry)->next->prev = (entry); \
  202         uvm_rb_insert((map), (entry)); \
  203 } while (/*CONSTCOND*/ 0)
  204 
  205 /*
  206  * uvm_map_entry_unlink: remove entry from a map
  207  *
  208  * => map must be locked
  209  */
  210 #define uvm_map_entry_unlink(map, entry) do { \
  211         (map)->nentries--; \
  212         (entry)->next->prev = (entry)->prev; \
  213         (entry)->prev->next = (entry)->next; \
  214         uvm_rb_remove((map), (entry)); \
  215 } while (/*CONSTCOND*/ 0)
  216 
  217 /*
  218  * SAVE_HINT: saves the specified entry as the hint for future lookups.
  219  *
  220  * => map need not be locked (protected by hint_lock).
  221  */
  222 #define SAVE_HINT(map,check,value) do { \
  223         simple_lock(&(map)->hint_lock); \
  224         if ((map)->hint == (check)) \
  225                 (map)->hint = (value); \
  226         simple_unlock(&(map)->hint_lock); \
  227 } while (/*CONSTCOND*/ 0)
  228 
  229 /*
  230  * VM_MAP_RANGE_CHECK: check and correct range
  231  *
  232  * => map must at least be read locked
  233  */
  234 
  235 #define VM_MAP_RANGE_CHECK(map, start, end) do { \
  236         if (start < vm_map_min(map))            \
  237                 start = vm_map_min(map);        \
  238         if (end > vm_map_max(map))              \
  239                 end = vm_map_max(map);          \
  240         if (start > end)                        \
  241                 start = end;                    \
  242 } while (/*CONSTCOND*/ 0)
  243 
  244 /*
  245  * local prototypes
  246  */
  247 
  248 static struct vm_map_entry *
  249                 uvm_mapent_alloc(struct vm_map *, int);
  250 static struct vm_map_entry *
  251                 uvm_mapent_alloc_split(struct vm_map *,
  252                     const struct vm_map_entry *, int,
  253                     struct uvm_mapent_reservation *);
  254 static void     uvm_mapent_copy(struct vm_map_entry *, struct vm_map_entry *);
  255 static void     uvm_mapent_free(struct vm_map_entry *);
  256 static struct vm_map_entry *
  257                 uvm_kmapent_alloc(struct vm_map *, int);
  258 static void     uvm_kmapent_free(struct vm_map_entry *);
  259 static void     uvm_map_entry_unwire(struct vm_map *, struct vm_map_entry *);
  260 static void     uvm_map_reference_amap(struct vm_map_entry *, int);
  261 static int      uvm_map_space_avail(vaddr_t *, vsize_t, voff_t, vsize_t, int,
  262                     struct vm_map_entry *);
  263 static void     uvm_map_unreference_amap(struct vm_map_entry *, int);
  264 
  265 int _uvm_tree_sanity(struct vm_map *, const char *);
  266 static vsize_t uvm_rb_subtree_space(const struct vm_map_entry *);
  267 
  268 static __inline int
  269 uvm_compare(const struct vm_map_entry *a, const struct vm_map_entry *b)
  270 {
  271 
  272         if (a->start < b->start)
  273                 return (-1);
  274         else if (a->start > b->start)
  275                 return (1);
  276 
  277         return (0);
  278 }
  279 
  280 static __inline void
  281 uvm_rb_augment(struct vm_map_entry *entry)
  282 {
  283 
  284         entry->space = uvm_rb_subtree_space(entry);
  285 }
  286 
  287 RB_PROTOTYPE(uvm_tree, vm_map_entry, rb_entry, uvm_compare);
  288 
  289 RB_GENERATE(uvm_tree, vm_map_entry, rb_entry, uvm_compare);
  290 
  291 static __inline vsize_t
  292 uvm_rb_space(const struct vm_map *map, const struct vm_map_entry *entry)
  293 {
  294         /* XXX map is not used */
  295 
  296         KASSERT(entry->next != NULL);
  297         return entry->next->start - entry->end;
  298 }
  299 
  300 static vsize_t
  301 uvm_rb_subtree_space(const struct vm_map_entry *entry)
  302 {
  303         vaddr_t space, tmp;
  304 
  305         space = entry->ownspace;
  306         if (RB_LEFT(entry, rb_entry)) {
  307                 tmp = RB_LEFT(entry, rb_entry)->space;
  308                 if (tmp > space)
  309                         space = tmp;
  310         }
  311 
  312         if (RB_RIGHT(entry, rb_entry)) {
  313                 tmp = RB_RIGHT(entry, rb_entry)->space;
  314                 if (tmp > space)
  315                         space = tmp;
  316         }
  317 
  318         return (space);
  319 }
  320 
  321 static __inline void
  322 uvm_rb_fixup(struct vm_map *map, struct vm_map_entry *entry)
  323 {
  324         /* We need to traverse to the very top */
  325         do {
  326                 entry->ownspace = uvm_rb_space(map, entry);
  327                 entry->space = uvm_rb_subtree_space(entry);
  328         } while ((entry = RB_PARENT(entry, rb_entry)) != NULL);
  329 }
  330 
  331 static __inline void
  332 uvm_rb_insert(struct vm_map *map, struct vm_map_entry *entry)
  333 {
  334         vaddr_t space = uvm_rb_space(map, entry);
  335         struct vm_map_entry *tmp;
  336 
  337         entry->ownspace = entry->space = space;
  338         tmp = RB_INSERT(uvm_tree, &(map)->rbhead, entry);
  339 #ifdef DIAGNOSTIC
  340         if (tmp != NULL)
  341                 panic("uvm_rb_insert: duplicate entry?");
  342 #endif
  343         uvm_rb_fixup(map, entry);
  344         if (entry->prev != &map->header)
  345                 uvm_rb_fixup(map, entry->prev);
  346 }
  347 
  348 static __inline void
  349 uvm_rb_remove(struct vm_map *map, struct vm_map_entry *entry)
  350 {
  351         struct vm_map_entry *parent;
  352 
  353         parent = RB_PARENT(entry, rb_entry);
  354         RB_REMOVE(uvm_tree, &(map)->rbhead, entry);
  355         if (entry->prev != &map->header)
  356                 uvm_rb_fixup(map, entry->prev);
  357         if (parent)
  358                 uvm_rb_fixup(map, parent);
  359 }
  360 
  361 #ifdef DEBUG
  362 int uvm_debug_check_rbtree = 0;
  363 #define uvm_tree_sanity(x,y)            \
  364         if (uvm_debug_check_rbtree)     \
  365                 _uvm_tree_sanity(x,y)
  366 #else
  367 #define uvm_tree_sanity(x,y)
  368 #endif
  369 
  370 int
  371 _uvm_tree_sanity(struct vm_map *map, const char *name)
  372 {
  373         struct vm_map_entry *tmp, *trtmp;
  374         int n = 0, i = 1;
  375 
  376         RB_FOREACH(tmp, uvm_tree, &map->rbhead) {
  377                 if (tmp->ownspace != uvm_rb_space(map, tmp)) {
  378                         printf("%s: %d/%d ownspace %lx != %lx %s\n",
  379                             name, n + 1, map->nentries,
  380                             (ulong)tmp->ownspace, (ulong)uvm_rb_space(map, tmp),
  381                             tmp->next == &map->header ? "(last)" : "");
  382                         goto error;
  383                 }
  384         }
  385         trtmp = NULL;
  386         RB_FOREACH(tmp, uvm_tree, &map->rbhead) {
  387                 if (tmp->space != uvm_rb_subtree_space(tmp)) {
  388                         printf("%s: space %lx != %lx\n",
  389                             name, (ulong)tmp->space,
  390                             (ulong)uvm_rb_subtree_space(tmp));
  391                         goto error;
  392                 }
  393                 if (trtmp != NULL && trtmp->start >= tmp->start) {
  394                         printf("%s: corrupt: 0x%lx >= 0x%lx\n",
  395                             name, trtmp->start, tmp->start);
  396                         goto error;
  397                 }
  398                 n++;
  399 
  400                 trtmp = tmp;
  401         }
  402 
  403         if (n != map->nentries) {
  404                 printf("%s: nentries: %d vs %d\n",
  405                     name, n, map->nentries);
  406                 goto error;
  407         }
  408 
  409         for (tmp = map->header.next; tmp && tmp != &map->header;
  410             tmp = tmp->next, i++) {
  411                 trtmp = RB_FIND(uvm_tree, &map->rbhead, tmp);
  412                 if (trtmp != tmp) {
  413                         printf("%s: lookup: %d: %p - %p: %p\n",
  414                             name, i, tmp, trtmp,
  415                             RB_PARENT(tmp, rb_entry));
  416                         goto error;
  417                 }
  418         }
  419 
  420         return (0);
  421  error:
  422 #ifdef  DDB
  423         /* handy breakpoint location for error case */
  424         __asm(".globl treesanity_label\ntreesanity_label:");
  425 #endif
  426         return (-1);
  427 }
  428 
  429 /*
  430  * local inlines
  431  */
  432 
  433 static __inline struct vm_map *uvm_kmapent_map(struct vm_map_entry *);
  434 
  435 /*
  436  * uvm_mapent_alloc: allocate a map entry
  437  */
  438 
  439 static __inline struct vm_map_entry *
  440 uvm_mapent_alloc(struct vm_map *map, int flags)
  441 {
  442         struct vm_map_entry *me;
  443         int pflags = (flags & UVM_FLAG_NOWAIT) ? PR_NOWAIT : PR_WAITOK;
  444         UVMHIST_FUNC("uvm_mapent_alloc"); UVMHIST_CALLED(maphist);
  445 
  446         if (VM_MAP_USE_KMAPENT(map)) {
  447                 me = uvm_kmapent_alloc(map, flags);
  448         } else {
  449                 me = pool_get(&uvm_map_entry_pool, pflags);
  450                 if (__predict_false(me == NULL))
  451                         return NULL;
  452                 me->flags = 0;
  453         }
  454 
  455         UVMHIST_LOG(maphist, "<- new entry=0x%x [kentry=%d]", me,
  456             ((map->flags & VM_MAP_INTRSAFE) != 0 || map == kernel_map), 0, 0);
  457         return (me);
  458 }
  459 
  460 /*
  461  * uvm_mapent_alloc_split: allocate a map entry for clipping.
  462  */
  463 
  464 static __inline struct vm_map_entry *
  465 uvm_mapent_alloc_split(struct vm_map *map,
  466     const struct vm_map_entry *old_entry, int flags,
  467     struct uvm_mapent_reservation *umr)
  468 {
  469         struct vm_map_entry *me;
  470 
  471         KASSERT(!VM_MAP_USE_KMAPENT(map) ||
  472             (old_entry->flags & UVM_MAP_QUANTUM) || !UMR_EMPTY(umr));
  473 
  474         if (old_entry->flags & UVM_MAP_QUANTUM) {
  475                 int s;
  476                 struct vm_map_kernel *vmk = vm_map_to_kernel(map);
  477 
  478                 s = splvm();
  479                 simple_lock(&uvm.kentry_lock);
  480                 me = vmk->vmk_merged_entries;
  481                 KASSERT(me);
  482                 vmk->vmk_merged_entries = me->next;
  483                 simple_unlock(&uvm.kentry_lock);
  484                 splx(s);
  485                 KASSERT(me->flags & UVM_MAP_QUANTUM);
  486         } else {
  487                 me = uvm_mapent_alloc(map, flags);
  488         }
  489 
  490         return me;
  491 }
  492 
  493 /*
  494  * uvm_mapent_free: free map entry
  495  */
  496 
  497 static __inline void
  498 uvm_mapent_free(struct vm_map_entry *me)
  499 {
  500         UVMHIST_FUNC("uvm_mapent_free"); UVMHIST_CALLED(maphist);
  501 
  502         UVMHIST_LOG(maphist,"<- freeing map entry=0x%x [flags=%d]",
  503                 me, me->flags, 0, 0);
  504         if (me->flags & UVM_MAP_KERNEL) {
  505                 uvm_kmapent_free(me);
  506         } else {
  507                 pool_put(&uvm_map_entry_pool, me);
  508         }
  509 }
  510 
  511 /*
  512  * uvm_mapent_free_merge: free merged map entry
  513  *
  514  * => keep the entry if needed.
  515  * => caller shouldn't hold map locked.
  516  */
  517 
  518 static __inline void
  519 uvm_mapent_free_merged(struct vm_map *map, struct vm_map_entry *me)
  520 {
  521 
  522         KASSERT(!(me->flags & UVM_MAP_KERNEL) || uvm_kmapent_map(me) == map);
  523 
  524         if (me->flags & UVM_MAP_QUANTUM) {
  525                 /*
  526                  * keep this entry for later splitting.
  527                  */
  528                 struct vm_map_kernel *vmk;
  529                 int s;
  530 
  531                 KASSERT(VM_MAP_IS_KERNEL(map));
  532                 KASSERT(!VM_MAP_USE_KMAPENT(map) ||
  533                     (me->flags & UVM_MAP_KERNEL));
  534 
  535                 vmk = vm_map_to_kernel(map);
  536                 s = splvm();
  537                 simple_lock(&uvm.kentry_lock);
  538                 me->next = vmk->vmk_merged_entries;
  539                 vmk->vmk_merged_entries = me;
  540                 simple_unlock(&uvm.kentry_lock);
  541                 splx(s);
  542         } else {
  543                 uvm_mapent_free(me);
  544         }
  545 }
  546 
  547 /*
  548  * uvm_mapent_copy: copy a map entry, preserving flags
  549  */
  550 
  551 static __inline void
  552 uvm_mapent_copy(struct vm_map_entry *src, struct vm_map_entry *dst)
  553 {
  554 
  555         memcpy(dst, src, ((char *)&src->uvm_map_entry_stop_copy) -
  556             ((char *)src));
  557 }
  558 
  559 /*
  560  * uvm_map_entry_unwire: unwire a map entry
  561  *
  562  * => map should be locked by caller
  563  */
  564 
  565 static __inline void
  566 uvm_map_entry_unwire(struct vm_map *map, struct vm_map_entry *entry)
  567 {
  568 
  569         entry->wired_count = 0;
  570         uvm_fault_unwire_locked(map, entry->start, entry->end);
  571 }
  572 
  573 
  574 /*
  575  * wrapper for calling amap_ref()
  576  */
  577 static __inline void
  578 uvm_map_reference_amap(struct vm_map_entry *entry, int flags)
  579 {
  580 
  581         amap_ref(entry->aref.ar_amap, entry->aref.ar_pageoff,
  582             (entry->end - entry->start) >> PAGE_SHIFT, flags);
  583 }
  584 
  585 
  586 /*
  587  * wrapper for calling amap_unref()
  588  */
  589 static __inline void
  590 uvm_map_unreference_amap(struct vm_map_entry *entry, int flags)
  591 {
  592 
  593         amap_unref(entry->aref.ar_amap, entry->aref.ar_pageoff,
  594             (entry->end - entry->start) >> PAGE_SHIFT, flags);
  595 }
  596 
  597 
  598 /*
  599  * uvm_map_init: init mapping system at boot time.   note that we allocate
  600  * and init the static pool of struct vm_map_entry *'s for the kernel here.
  601  */
  602 
  603 void
  604 uvm_map_init(void)
  605 {
  606 #if defined(UVMHIST)
  607         static struct uvm_history_ent maphistbuf[100];
  608         static struct uvm_history_ent pdhistbuf[100];
  609 #endif
  610 
  611         /*
  612          * first, init logging system.
  613          */
  614 
  615         UVMHIST_FUNC("uvm_map_init");
  616         UVMHIST_INIT_STATIC(maphist, maphistbuf);
  617         UVMHIST_INIT_STATIC(pdhist, pdhistbuf);
  618         UVMHIST_CALLED(maphist);
  619         UVMHIST_LOG(maphist,"<starting uvm map system>", 0, 0, 0, 0);
  620 
  621         /*
  622          * initialize the global lock for kernel map entry.
  623          *
  624          * XXX is it worth to have per-map lock instead?
  625          */
  626 
  627         simple_lock_init(&uvm.kentry_lock);
  628 }
  629 
  630 /*
  631  * clippers
  632  */
  633 
  634 /*
  635  * uvm_map_clip_start: ensure that the entry begins at or after
  636  *      the starting address, if it doesn't we split the entry.
  637  *
  638  * => caller should use UVM_MAP_CLIP_START macro rather than calling
  639  *    this directly
  640  * => map must be locked by caller
  641  */
  642 
  643 void
  644 uvm_map_clip_start(struct vm_map *map, struct vm_map_entry *entry,
  645     vaddr_t start, struct uvm_mapent_reservation *umr)
  646 {
  647         struct vm_map_entry *new_entry;
  648         vaddr_t new_adj;
  649 
  650         /* uvm_map_simplify_entry(map, entry); */ /* XXX */
  651 
  652         uvm_tree_sanity(map, "clip_start entry");
  653 
  654         /*
  655          * Split off the front portion.  note that we must insert the new
  656          * entry BEFORE this one, so that this entry has the specified
  657          * starting address.
  658          */
  659         new_entry = uvm_mapent_alloc_split(map, entry, 0, umr);
  660         uvm_mapent_copy(entry, new_entry); /* entry -> new_entry */
  661 
  662         new_entry->end = start;
  663         new_adj = start - new_entry->start;
  664         if (entry->object.uvm_obj)
  665                 entry->offset += new_adj;       /* shift start over */
  666 
  667         /* Does not change order for the RB tree */
  668         entry->start = start;
  669 
  670         if (new_entry->aref.ar_amap) {
  671                 amap_splitref(&new_entry->aref, &entry->aref, new_adj);
  672         }
  673 
  674         uvm_map_entry_link(map, entry->prev, new_entry);
  675 
  676         if (UVM_ET_ISSUBMAP(entry)) {
  677                 /* ... unlikely to happen, but play it safe */
  678                  uvm_map_reference(new_entry->object.sub_map);
  679         } else {
  680                 if (UVM_ET_ISOBJ(entry) &&
  681                     entry->object.uvm_obj->pgops &&
  682                     entry->object.uvm_obj->pgops->pgo_reference)
  683                         entry->object.uvm_obj->pgops->pgo_reference(
  684                             entry->object.uvm_obj);
  685         }
  686 
  687         uvm_tree_sanity(map, "clip_start leave");
  688 }
  689 
  690 /*
  691  * uvm_map_clip_end: ensure that the entry ends at or before
  692  *      the ending address, if it does't we split the reference
  693  *
  694  * => caller should use UVM_MAP_CLIP_END macro rather than calling
  695  *    this directly
  696  * => map must be locked by caller
  697  */
  698 
  699 void
  700 uvm_map_clip_end(struct vm_map *map, struct vm_map_entry *entry, vaddr_t end,
  701     struct uvm_mapent_reservation *umr)
  702 {
  703         struct vm_map_entry *   new_entry;
  704         vaddr_t new_adj; /* #bytes we move start forward */
  705 
  706         uvm_tree_sanity(map, "clip_end entry");
  707 
  708         /*
  709          *      Create a new entry and insert it
  710          *      AFTER the specified entry
  711          */
  712         new_entry = uvm_mapent_alloc_split(map, entry, 0, umr);
  713         uvm_mapent_copy(entry, new_entry); /* entry -> new_entry */
  714 
  715         new_entry->start = entry->end = end;
  716         new_adj = end - entry->start;
  717         if (new_entry->object.uvm_obj)
  718                 new_entry->offset += new_adj;
  719 
  720         if (entry->aref.ar_amap)
  721                 amap_splitref(&entry->aref, &new_entry->aref, new_adj);
  722 
  723         uvm_rb_fixup(map, entry);
  724 
  725         uvm_map_entry_link(map, entry, new_entry);
  726 
  727         if (UVM_ET_ISSUBMAP(entry)) {
  728                 /* ... unlikely to happen, but play it safe */
  729                 uvm_map_reference(new_entry->object.sub_map);
  730         } else {
  731                 if (UVM_ET_ISOBJ(entry) &&
  732                     entry->object.uvm_obj->pgops &&
  733                     entry->object.uvm_obj->pgops->pgo_reference)
  734                         entry->object.uvm_obj->pgops->pgo_reference(
  735                             entry->object.uvm_obj);
  736         }
  737 
  738         uvm_tree_sanity(map, "clip_end leave");
  739 }
  740 
  741 
  742 /*
  743  *   M A P   -   m a i n   e n t r y   p o i n t
  744  */
  745 /*
  746  * uvm_map: establish a valid mapping in a map
  747  *
  748  * => assume startp is page aligned.
  749  * => assume size is a multiple of PAGE_SIZE.
  750  * => assume sys_mmap provides enough of a "hint" to have us skip
  751  *      over text/data/bss area.
  752  * => map must be unlocked (we will lock it)
  753  * => <uobj,uoffset> value meanings (4 cases):
  754  *       [1] <NULL,uoffset>             == uoffset is a hint for PMAP_PREFER
  755  *       [2] <NULL,UVM_UNKNOWN_OFFSET>  == don't PMAP_PREFER
  756  *       [3] <uobj,uoffset>             == normal mapping
  757  *       [4] <uobj,UVM_UNKNOWN_OFFSET>  == uvm_map finds offset based on VA
  758  *
  759  *    case [4] is for kernel mappings where we don't know the offset until
  760  *    we've found a virtual address.   note that kernel object offsets are
  761  *    always relative to vm_map_min(kernel_map).
  762  *
  763  * => if `align' is non-zero, we align the virtual address to the specified
  764  *      alignment.
  765  *      this is provided as a mechanism for large pages.
  766  *
  767  * => XXXCDC: need way to map in external amap?
  768  */
  769 
  770 int
  771 uvm_map(struct vm_map *map, vaddr_t *startp /* IN/OUT */, vsize_t size,
  772     struct uvm_object *uobj, voff_t uoffset, vsize_t align, uvm_flag_t flags)
  773 {
  774         struct uvm_map_args args;
  775         struct vm_map_entry *new_entry;
  776         int error;
  777 
  778         KASSERT((flags & UVM_FLAG_QUANTUM) == 0 || VM_MAP_IS_KERNEL(map));
  779 
  780         /*
  781          * for pager_map, allocate the new entry first to avoid sleeping
  782          * for memory while we have the map locked.
  783          *
  784          * besides, because we allocates entries for in-kernel maps
  785          * a bit differently (cf. uvm_kmapent_alloc/free), we need to
  786          * allocate them before locking the map.
  787          */
  788 
  789         new_entry = NULL;
  790         if (VM_MAP_USE_KMAPENT(map) || (flags & UVM_FLAG_QUANTUM) ||
  791             map == pager_map) {
  792                 new_entry = uvm_mapent_alloc(map, (flags & UVM_FLAG_NOWAIT));
  793                 if (__predict_false(new_entry == NULL))
  794                         return ENOMEM;
  795                 if (flags & UVM_FLAG_QUANTUM)
  796                         new_entry->flags |= UVM_MAP_QUANTUM;
  797         }
  798         if (map == pager_map)
  799                 flags |= UVM_FLAG_NOMERGE;
  800 
  801         error = uvm_map_prepare(map, *startp, size, uobj, uoffset, align,
  802             flags, &args);
  803         if (!error) {
  804                 error = uvm_map_enter(map, &args, new_entry);
  805                 *startp = args.uma_start;
  806         } else if (new_entry) {
  807                 uvm_mapent_free(new_entry);
  808         }
  809 
  810         return error;
  811 }
  812 
  813 int
  814 uvm_map_prepare(struct vm_map *map, vaddr_t start, vsize_t size,
  815     struct uvm_object *uobj, voff_t uoffset, vsize_t align, uvm_flag_t flags,
  816     struct uvm_map_args *args)
  817 {
  818         struct vm_map_entry *prev_entry;
  819         vm_prot_t prot = UVM_PROTECTION(flags);
  820         vm_prot_t maxprot = UVM_MAXPROTECTION(flags);
  821 
  822         UVMHIST_FUNC("uvm_map_prepare");
  823         UVMHIST_CALLED(maphist);
  824 
  825         UVMHIST_LOG(maphist, "(map=0x%x, start=0x%x, size=%d, flags=0x%x)",
  826             map, start, size, flags);
  827         UVMHIST_LOG(maphist, "  uobj/offset 0x%x/%d", uobj, uoffset,0,0);
  828 
  829         /*
  830          * detect a popular device driver bug.
  831          */
  832 
  833         KASSERT(doing_shutdown || curlwp != NULL ||
  834             (map->flags & VM_MAP_INTRSAFE));
  835 
  836         /*
  837          * zero-sized mapping doesn't make any sense.
  838          */
  839         KASSERT(size > 0);
  840 
  841         KASSERT((~flags & (UVM_FLAG_NOWAIT | UVM_FLAG_WAITVA)) != 0);
  842 
  843         uvm_tree_sanity(map, "map entry");
  844 
  845         /*
  846          * check sanity of protection code
  847          */
  848 
  849         if ((prot & maxprot) != prot) {
  850                 UVMHIST_LOG(maphist, "<- prot. failure:  prot=0x%x, max=0x%x",
  851                 prot, maxprot,0,0);
  852                 return EACCES;
  853         }
  854 
  855         /*
  856          * figure out where to put new VM range
  857          */
  858 
  859 retry:
  860         if (vm_map_lock_try(map) == FALSE) {
  861                 if (flags & UVM_FLAG_TRYLOCK) {
  862                         return EAGAIN;
  863                 }
  864                 vm_map_lock(map); /* could sleep here */
  865         }
  866         if ((prev_entry = uvm_map_findspace(map, start, size, &start,
  867             uobj, uoffset, align, flags)) == NULL) {
  868                 unsigned int timestamp;
  869 
  870                 if ((flags & UVM_FLAG_WAITVA) == 0) {
  871                         UVMHIST_LOG(maphist,"<- uvm_map_findspace failed!",
  872                             0,0,0,0);
  873                         vm_map_unlock(map);
  874                         return ENOMEM;
  875                 }
  876                 timestamp = map->timestamp;
  877                 UVMHIST_LOG(maphist,"waiting va timestamp=0x%x",
  878                             timestamp,0,0,0);
  879                 simple_lock(&map->flags_lock);
  880                 map->flags |= VM_MAP_WANTVA;
  881                 simple_unlock(&map->flags_lock);
  882                 vm_map_unlock(map);
  883 
  884                 /*
  885                  * wait until someone does unmap.
  886                  * XXX fragile locking
  887                  */
  888 
  889                 simple_lock(&map->flags_lock);
  890                 while ((map->flags & VM_MAP_WANTVA) != 0 &&
  891                    map->timestamp == timestamp) {
  892                         ltsleep(&map->header, PVM, "vmmapva", 0,
  893                             &map->flags_lock);
  894                 }
  895                 simple_unlock(&map->flags_lock);
  896                 goto retry;
  897         }
  898 
  899 #ifdef PMAP_GROWKERNEL
  900         /*
  901          * If the kernel pmap can't map the requested space,
  902          * then allocate more resources for it.
  903          */
  904         if (map == kernel_map && uvm_maxkaddr < (start + size))
  905                 uvm_maxkaddr = pmap_growkernel(start + size);
  906 #endif
  907 
  908         UVMCNT_INCR(uvm_map_call);
  909 
  910         /*
  911          * if uobj is null, then uoffset is either a VAC hint for PMAP_PREFER
  912          * [typically from uvm_map_reserve] or it is UVM_UNKNOWN_OFFSET.   in
  913          * either case we want to zero it  before storing it in the map entry
  914          * (because it looks strange and confusing when debugging...)
  915          *
  916          * if uobj is not null
  917          *   if uoffset is not UVM_UNKNOWN_OFFSET then we have a normal mapping
  918          *      and we do not need to change uoffset.
  919          *   if uoffset is UVM_UNKNOWN_OFFSET then we need to find the offset
  920          *      now (based on the starting address of the map).   this case is
  921          *      for kernel object mappings where we don't know the offset until
  922          *      the virtual address is found (with uvm_map_findspace).   the
  923          *      offset is the distance we are from the start of the map.
  924          */
  925 
  926         if (uobj == NULL) {
  927                 uoffset = 0;
  928         } else {
  929                 if (uoffset == UVM_UNKNOWN_OFFSET) {
  930                         KASSERT(UVM_OBJ_IS_KERN_OBJECT(uobj));
  931                         uoffset = start - vm_map_min(kernel_map);
  932                 }
  933         }
  934 
  935         args->uma_flags = flags;
  936         args->uma_prev = prev_entry;
  937         args->uma_start = start;
  938         args->uma_size = size;
  939         args->uma_uobj = uobj;
  940         args->uma_uoffset = uoffset;
  941 
  942         return 0;
  943 }
  944 
  945 int
  946 uvm_map_enter(struct vm_map *map, const struct uvm_map_args *args,
  947     struct vm_map_entry *new_entry)
  948 {
  949         struct vm_map_entry *prev_entry = args->uma_prev;
  950         struct vm_map_entry *dead = NULL;
  951 
  952         const uvm_flag_t flags = args->uma_flags;
  953         const vm_prot_t prot = UVM_PROTECTION(flags);
  954         const vm_prot_t maxprot = UVM_MAXPROTECTION(flags);
  955         const vm_inherit_t inherit = UVM_INHERIT(flags);
  956         const int amapwaitflag = (flags & UVM_FLAG_NOWAIT) ?
  957             AMAP_EXTEND_NOWAIT : 0;
  958         const int advice = UVM_ADVICE(flags);
  959         const int meflagmask = UVM_MAP_NOMERGE | UVM_MAP_QUANTUM;
  960         const int meflagval = (flags & UVM_FLAG_QUANTUM) ?
  961             UVM_MAP_QUANTUM : 0;
  962 
  963         vaddr_t start = args->uma_start;
  964         vsize_t size = args->uma_size;
  965         struct uvm_object *uobj = args->uma_uobj;
  966         voff_t uoffset = args->uma_uoffset;
  967 
  968         const int kmap = (vm_map_pmap(map) == pmap_kernel());
  969         int merged = 0;
  970         int error;
  971         int newetype;
  972 
  973         UVMHIST_FUNC("uvm_map_enter");
  974         UVMHIST_CALLED(maphist);
  975 
  976         UVMHIST_LOG(maphist, "(map=0x%x, start=0x%x, size=%d, flags=0x%x)",
  977             map, start, size, flags);
  978         UVMHIST_LOG(maphist, "  uobj/offset 0x%x/%d", uobj, uoffset,0,0);
  979 
  980         if (flags & UVM_FLAG_QUANTUM) {
  981                 KASSERT(new_entry);
  982                 KASSERT(new_entry->flags & UVM_MAP_QUANTUM);
  983         }
  984 
  985         if (uobj)
  986                 newetype = UVM_ET_OBJ;
  987         else
  988                 newetype = 0;
  989 
  990         if (flags & UVM_FLAG_COPYONW) {
  991                 newetype |= UVM_ET_COPYONWRITE;
  992                 if ((flags & UVM_FLAG_OVERLAY) == 0)
  993                         newetype |= UVM_ET_NEEDSCOPY;
  994         }
  995 
  996         /*
  997          * try and insert in map by extending previous entry, if possible.
  998          * XXX: we don't try and pull back the next entry.   might be useful
  999          * for a stack, but we are currently allocating our stack in advance.
 1000          */
 1001 
 1002         if (flags & UVM_FLAG_NOMERGE)
 1003                 goto nomerge;
 1004 
 1005         if (prev_entry->etype == newetype &&
 1006             prev_entry->end == start &&
 1007             prev_entry != &map->header &&
 1008             prev_entry->object.uvm_obj == uobj) {
 1009 
 1010                 if ((prev_entry->flags & meflagmask) != meflagval)
 1011                         goto forwardmerge;
 1012 
 1013                 if (uobj && prev_entry->offset +
 1014                     (prev_entry->end - prev_entry->start) != uoffset)
 1015                         goto forwardmerge;
 1016 
 1017                 if (prev_entry->protection != prot ||
 1018                     prev_entry->max_protection != maxprot)
 1019                         goto forwardmerge;
 1020 
 1021                 if (prev_entry->inheritance != inherit ||
 1022                     prev_entry->advice != advice)
 1023                         goto forwardmerge;
 1024 
 1025                 /* wiring status must match (new area is unwired) */
 1026                 if (VM_MAPENT_ISWIRED(prev_entry))
 1027                         goto forwardmerge;
 1028 
 1029                 /*
 1030                  * can't extend a shared amap.  note: no need to lock amap to
 1031                  * look at refs since we don't care about its exact value.
 1032                  * if it is one (i.e. we have only reference) it will stay there
 1033                  */
 1034 
 1035                 if (prev_entry->aref.ar_amap &&
 1036                     amap_refs(prev_entry->aref.ar_amap) != 1) {
 1037                         goto forwardmerge;
 1038                 }
 1039 
 1040                 if (prev_entry->aref.ar_amap) {
 1041                         error = amap_extend(prev_entry, size,
 1042                             amapwaitflag | AMAP_EXTEND_FORWARDS);
 1043                         if (error)
 1044                                 goto done;
 1045                 }
 1046 
 1047                 if (kmap)
 1048                         UVMCNT_INCR(map_kbackmerge);
 1049                 else
 1050                         UVMCNT_INCR(map_ubackmerge);
 1051                 UVMHIST_LOG(maphist,"  starting back merge", 0, 0, 0, 0);
 1052 
 1053                 /*
 1054                  * drop our reference to uobj since we are extending a reference
 1055                  * that we already have (the ref count can not drop to zero).
 1056                  */
 1057 
 1058                 if (uobj && uobj->pgops->pgo_detach)
 1059                         uobj->pgops->pgo_detach(uobj);
 1060 
 1061                 prev_entry->end += size;
 1062                 uvm_rb_fixup(map, prev_entry);
 1063 
 1064                 uvm_tree_sanity(map, "map backmerged");
 1065 
 1066                 UVMHIST_LOG(maphist,"<- done (via backmerge)!", 0, 0, 0, 0);
 1067                 merged++;
 1068         }
 1069 
 1070 forwardmerge:
 1071         if (prev_entry->next->etype == newetype &&
 1072             prev_entry->next->start == (start + size) &&
 1073             prev_entry->next != &map->header &&
 1074             prev_entry->next->object.uvm_obj == uobj) {
 1075 
 1076                 if ((prev_entry->next->flags & meflagmask) != meflagval)
 1077                         goto nomerge;
 1078 
 1079                 if (uobj && prev_entry->next->offset != uoffset + size)
 1080                         goto nomerge;
 1081 
 1082                 if (prev_entry->next->protection != prot ||
 1083                     prev_entry->next->max_protection != maxprot)
 1084                         goto nomerge;
 1085 
 1086                 if (prev_entry->next->inheritance != inherit ||
 1087                     prev_entry->next->advice != advice)
 1088                         goto nomerge;
 1089 
 1090                 /* wiring status must match (new area is unwired) */
 1091                 if (VM_MAPENT_ISWIRED(prev_entry->next))
 1092                         goto nomerge;
 1093 
 1094                 /*
 1095                  * can't extend a shared amap.  note: no need to lock amap to
 1096                  * look at refs since we don't care about its exact value.
 1097                  * if it is one (i.e. we have only reference) it will stay there.
 1098                  *
 1099                  * note that we also can't merge two amaps, so if we
 1100                  * merged with the previous entry which has an amap,
 1101                  * and the next entry also has an amap, we give up.
 1102                  *
 1103                  * Interesting cases:
 1104                  * amap, new, amap -> give up second merge (single fwd extend)
 1105                  * amap, new, none -> double forward extend (extend again here)
 1106                  * none, new, amap -> double backward extend (done here)
 1107                  * uobj, new, amap -> single backward extend (done here)
 1108                  *
 1109                  * XXX should we attempt to deal with someone refilling
 1110                  * the deallocated region between two entries that are
 1111                  * backed by the same amap (ie, arefs is 2, "prev" and
 1112                  * "next" refer to it, and adding this allocation will
 1113                  * close the hole, thus restoring arefs to 1 and
 1114                  * deallocating the "next" vm_map_entry)?  -- @@@
 1115                  */
 1116 
 1117                 if (prev_entry->next->aref.ar_amap &&
 1118                     (amap_refs(prev_entry->next->aref.ar_amap) != 1 ||
 1119                      (merged && prev_entry->aref.ar_amap))) {
 1120                         goto nomerge;
 1121                 }
 1122 
 1123                 if (merged) {
 1124                         /*
 1125                          * Try to extend the amap of the previous entry to
 1126                          * cover the next entry as well.  If it doesn't work
 1127                          * just skip on, don't actually give up, since we've
 1128                          * already completed the back merge.
 1129                          */
 1130                         if (prev_entry->aref.ar_amap) {
 1131                                 if (amap_extend(prev_entry,
 1132                                     prev_entry->next->end -
 1133                                     prev_entry->next->start,
 1134                                     amapwaitflag | AMAP_EXTEND_FORWARDS))
 1135                                         goto nomerge;
 1136                         }
 1137 
 1138                         /*
 1139                          * Try to extend the amap of the *next* entry
 1140                          * back to cover the new allocation *and* the
 1141                          * previous entry as well (the previous merge
 1142                          * didn't have an amap already otherwise we
 1143                          * wouldn't be checking here for an amap).  If
 1144                          * it doesn't work just skip on, again, don't
 1145                          * actually give up, since we've already
 1146                          * completed the back merge.
 1147                          */
 1148                         else if (prev_entry->next->aref.ar_amap) {
 1149                                 if (amap_extend(prev_entry->next,
 1150                                     prev_entry->end -
 1151                                     prev_entry->start,
 1152                                     amapwaitflag | AMAP_EXTEND_BACKWARDS))
 1153                                         goto nomerge;
 1154                         }
 1155                 } else {
 1156                         /*
 1157                          * Pull the next entry's amap backwards to cover this
 1158                          * new allocation.
 1159                          */
 1160                         if (prev_entry->next->aref.ar_amap) {
 1161                                 error = amap_extend(prev_entry->next, size,
 1162                                     amapwaitflag | AMAP_EXTEND_BACKWARDS);
 1163                                 if (error)
 1164                                         goto done;
 1165                         }
 1166                 }
 1167 
 1168                 if (merged) {
 1169                         if (kmap) {
 1170                                 UVMCNT_DECR(map_kbackmerge);
 1171                                 UVMCNT_INCR(map_kbimerge);
 1172                         } else {
 1173                                 UVMCNT_DECR(map_ubackmerge);
 1174                                 UVMCNT_INCR(map_ubimerge);
 1175                         }
 1176                 } else {
 1177                         if (kmap)
 1178                                 UVMCNT_INCR(map_kforwmerge);
 1179                         else
 1180                                 UVMCNT_INCR(map_uforwmerge);
 1181                 }
 1182                 UVMHIST_LOG(maphist,"  starting forward merge", 0, 0, 0, 0);
 1183 
 1184                 /*
 1185                  * drop our reference to uobj since we are extending a reference
 1186                  * that we already have (the ref count can not drop to zero).
 1187                  * (if merged, we've already detached)
 1188                  */
 1189                 if (uobj && uobj->pgops->pgo_detach && !merged)
 1190                         uobj->pgops->pgo_detach(uobj);
 1191 
 1192                 if (merged) {
 1193                         dead = prev_entry->next;
 1194                         prev_entry->end = dead->end;
 1195                         uvm_map_entry_unlink(map, dead);
 1196                         if (dead->aref.ar_amap != NULL) {
 1197                                 prev_entry->aref = dead->aref;
 1198                                 dead->aref.ar_amap = NULL;
 1199                         }
 1200                 } else {
 1201                         prev_entry->next->start -= size;
 1202                         if (prev_entry != &map->header)
 1203                                 uvm_rb_fixup(map, prev_entry);
 1204                         if (uobj)
 1205                                 prev_entry->next->offset = uoffset;
 1206                 }
 1207 
 1208                 uvm_tree_sanity(map, "map forwardmerged");
 1209 
 1210                 UVMHIST_LOG(maphist,"<- done forwardmerge", 0, 0, 0, 0);
 1211                 merged++;
 1212         }
 1213 
 1214 nomerge:
 1215         if (!merged) {
 1216                 UVMHIST_LOG(maphist,"  allocating new map entry", 0, 0, 0, 0);
 1217                 if (kmap)
 1218                         UVMCNT_INCR(map_knomerge);
 1219                 else
 1220                         UVMCNT_INCR(map_unomerge);
 1221 
 1222                 /*
 1223                  * allocate new entry and link it in.
 1224                  */
 1225 
 1226                 if (new_entry == NULL) {
 1227                         new_entry = uvm_mapent_alloc(map,
 1228                                 (flags & UVM_FLAG_NOWAIT));
 1229                         if (__predict_false(new_entry == NULL)) {
 1230                                 error = ENOMEM;
 1231                                 goto done;
 1232                         }
 1233                 }
 1234                 new_entry->start = start;
 1235                 new_entry->end = new_entry->start + size;
 1236                 new_entry->object.uvm_obj = uobj;
 1237                 new_entry->offset = uoffset;
 1238 
 1239                 new_entry->etype = newetype;
 1240 
 1241                 if (flags & UVM_FLAG_NOMERGE) {
 1242                         new_entry->flags |= UVM_MAP_NOMERGE;
 1243                 }
 1244 
 1245                 new_entry->protection = prot;
 1246                 new_entry->max_protection = maxprot;
 1247                 new_entry->inheritance = inherit;
 1248                 new_entry->wired_count = 0;
 1249                 new_entry->advice = advice;
 1250                 if (flags & UVM_FLAG_OVERLAY) {
 1251 
 1252                         /*
 1253                          * to_add: for BSS we overallocate a little since we
 1254                          * are likely to extend
 1255                          */
 1256 
 1257                         vaddr_t to_add = (flags & UVM_FLAG_AMAPPAD) ?
 1258                                 UVM_AMAP_CHUNK << PAGE_SHIFT : 0;
 1259                         struct vm_amap *amap = amap_alloc(size, to_add,
 1260                             (flags & UVM_FLAG_NOWAIT) ? M_NOWAIT : M_WAITOK);
 1261                         if (__predict_false(amap == NULL)) {
 1262                                 error = ENOMEM;
 1263                                 goto done;
 1264                         }
 1265                         new_entry->aref.ar_pageoff = 0;
 1266                         new_entry->aref.ar_amap = amap;
 1267                 } else {
 1268                         new_entry->aref.ar_pageoff = 0;
 1269                         new_entry->aref.ar_amap = NULL;
 1270                 }
 1271                 uvm_map_entry_link(map, prev_entry, new_entry);
 1272 
 1273                 /*
 1274                  * Update the free space hint
 1275                  */
 1276 
 1277                 if ((map->first_free == prev_entry) &&
 1278                     (prev_entry->end >= new_entry->start))
 1279                         map->first_free = new_entry;
 1280 
 1281                 new_entry = NULL;
 1282         }
 1283 
 1284         map->size += size;
 1285 
 1286         UVMHIST_LOG(maphist,"<- done!", 0, 0, 0, 0);
 1287 
 1288         error = 0;
 1289 done:
 1290         vm_map_unlock(map);
 1291         if (new_entry) {
 1292                 if (error == 0) {
 1293                         KDASSERT(merged);
 1294                         uvm_mapent_free_merged(map, new_entry);
 1295                 } else {
 1296                         uvm_mapent_free(new_entry);
 1297                 }
 1298         }
 1299         if (dead) {
 1300                 KDASSERT(merged);
 1301                 uvm_mapent_free_merged(map, dead);
 1302         }
 1303         return error;
 1304 }
 1305 
 1306 /*
 1307  * uvm_map_lookup_entry: find map entry at or before an address
 1308  *
 1309  * => map must at least be read-locked by caller
 1310  * => entry is returned in "entry"
 1311  * => return value is true if address is in the returned entry
 1312  */
 1313 
 1314 boolean_t
 1315 uvm_map_lookup_entry(struct vm_map *map, vaddr_t address,
 1316     struct vm_map_entry **entry /* OUT */)
 1317 {
 1318         struct vm_map_entry *cur;
 1319         boolean_t use_tree = FALSE;
 1320         UVMHIST_FUNC("uvm_map_lookup_entry");
 1321         UVMHIST_CALLED(maphist);
 1322 
 1323         UVMHIST_LOG(maphist,"(map=0x%x,addr=0x%x,ent=0x%x)",
 1324             map, address, entry, 0);
 1325 
 1326         /*
 1327          * start looking either from the head of the
 1328          * list, or from the hint.
 1329          */
 1330 
 1331         simple_lock(&map->hint_lock);
 1332         cur = map->hint;
 1333         simple_unlock(&map->hint_lock);
 1334 
 1335         if (cur == &map->header)
 1336                 cur = cur->next;
 1337 
 1338         UVMCNT_INCR(uvm_mlk_call);
 1339         if (address >= cur->start) {
 1340 
 1341                 /*
 1342                  * go from hint to end of list.
 1343                  *
 1344                  * but first, make a quick check to see if
 1345                  * we are already looking at the entry we
 1346                  * want (which is usually the case).
 1347                  * note also that we don't need to save the hint
 1348                  * here... it is the same hint (unless we are
 1349                  * at the header, in which case the hint didn't
 1350                  * buy us anything anyway).
 1351                  */
 1352 
 1353                 if (cur != &map->header && cur->end > address) {
 1354                         UVMCNT_INCR(uvm_mlk_hint);
 1355                         *entry = cur;
 1356                         UVMHIST_LOG(maphist,"<- got it via hint (0x%x)",
 1357                             cur, 0, 0, 0);
 1358                         return (TRUE);
 1359                 }
 1360 
 1361                 if (map->nentries > 30)
 1362                         use_tree = TRUE;
 1363         } else {
 1364 
 1365                 /*
 1366                  * invalid hint.  use tree.
 1367                  */
 1368                 use_tree = TRUE;
 1369         }
 1370 
 1371         uvm_tree_sanity(map, __func__);
 1372 
 1373         if (use_tree) {
 1374                 struct vm_map_entry *prev = &map->header;
 1375                 cur = RB_ROOT(&map->rbhead);
 1376 
 1377                 /*
 1378                  * Simple lookup in the tree.  Happens when the hint is
 1379                  * invalid, or nentries reach a threshold.
 1380                  */
 1381                 while (cur) {
 1382                         if (address >= cur->start) {
 1383                                 if (address < cur->end) {
 1384                                         *entry = cur;
 1385                                         goto got;
 1386                                 }
 1387                                 prev = cur;
 1388                                 cur = RB_RIGHT(cur, rb_entry);
 1389                         } else
 1390                                 cur = RB_LEFT(cur, rb_entry);
 1391                 }
 1392                 *entry = prev;
 1393                 goto failed;
 1394         }
 1395 
 1396         /*
 1397          * search linearly
 1398          */
 1399 
 1400         while (cur != &map->header) {
 1401                 if (cur->end > address) {
 1402                         if (address >= cur->start) {
 1403                                 /*
 1404                                  * save this lookup for future
 1405                                  * hints, and return
 1406                                  */
 1407 
 1408                                 *entry = cur;
 1409 got:
 1410                                 SAVE_HINT(map, map->hint, *entry);
 1411                                 UVMHIST_LOG(maphist,"<- search got it (0x%x)",
 1412                                         cur, 0, 0, 0);
 1413                                 KDASSERT((*entry)->start <= address);
 1414                                 KDASSERT(address < (*entry)->end);
 1415                                 return (TRUE);
 1416                         }
 1417                         break;
 1418                 }
 1419                 cur = cur->next;
 1420         }
 1421         *entry = cur->prev;
 1422 failed:
 1423         SAVE_HINT(map, map->hint, *entry);
 1424         UVMHIST_LOG(maphist,"<- failed!",0,0,0,0);
 1425         KDASSERT((*entry) == &map->header || (*entry)->end <= address);
 1426         KDASSERT((*entry)->next == &map->header ||
 1427             address < (*entry)->next->start);
 1428         return (FALSE);
 1429 }
 1430 
 1431 /*
 1432  * See if the range between start and start + length fits in the gap
 1433  * entry->next->start and entry->end.  Returns 1 if fits, 0 if doesn't
 1434  * fit, and -1 address wraps around.
 1435  */
 1436 static __inline int
 1437 uvm_map_space_avail(vaddr_t *start, vsize_t length, voff_t uoffset,
 1438     vsize_t align, int topdown, struct vm_map_entry *entry)
 1439 {
 1440         vaddr_t end;
 1441 
 1442 #ifdef PMAP_PREFER
 1443         /*
 1444          * push start address forward as needed to avoid VAC alias problems.
 1445          * we only do this if a valid offset is specified.
 1446          */
 1447 
 1448         if (uoffset != UVM_UNKNOWN_OFFSET)
 1449                 PMAP_PREFER(uoffset, start, length, topdown);
 1450 #endif
 1451         if (align != 0) {
 1452                 if ((*start & (align - 1)) != 0) {
 1453                         if (topdown)
 1454                                 *start &= ~(align - 1);
 1455                         else
 1456                                 *start = roundup(*start, align);
 1457                 }
 1458                 /*
 1459                  * XXX Should we PMAP_PREFER() here again?
 1460                  * eh...i think we're okay
 1461                  */
 1462         }
 1463 
 1464         /*
 1465          * Find the end of the proposed new region.  Be sure we didn't
 1466          * wrap around the address; if so, we lose.  Otherwise, if the
 1467          * proposed new region fits before the next entry, we win.
 1468          */
 1469 
 1470         end = *start + length;
 1471         if (end < *start)
 1472                 return (-1);
 1473 
 1474         if (entry->next->start >= end && *start >= entry->end)
 1475                 return (1);
 1476 
 1477         return (0);
 1478 }
 1479 
 1480 /*
 1481  * uvm_map_findspace: find "length" sized space in "map".
 1482  *
 1483  * => "hint" is a hint about where we want it, unless UVM_FLAG_FIXED is
 1484  *      set in "flags" (in which case we insist on using "hint").
 1485  * => "result" is VA returned
 1486  * => uobj/uoffset are to be used to handle VAC alignment, if required
 1487  * => if "align" is non-zero, we attempt to align to that value.
 1488  * => caller must at least have read-locked map
 1489  * => returns NULL on failure, or pointer to prev. map entry if success
 1490  * => note this is a cross between the old vm_map_findspace and vm_map_find
 1491  */
 1492 
 1493 struct vm_map_entry *
 1494 uvm_map_findspace(struct vm_map *map, vaddr_t hint, vsize_t length,
 1495     vaddr_t *result /* OUT */, struct uvm_object *uobj, voff_t uoffset,
 1496     vsize_t align, int flags)
 1497 {
 1498         struct vm_map_entry *entry;
 1499         struct vm_map_entry *child, *prev, *tmp;
 1500         vaddr_t orig_hint;
 1501         const int topdown = map->flags & VM_MAP_TOPDOWN;
 1502         UVMHIST_FUNC("uvm_map_findspace");
 1503         UVMHIST_CALLED(maphist);
 1504 
 1505         UVMHIST_LOG(maphist, "(map=0x%x, hint=0x%x, len=%d, flags=0x%x)",
 1506             map, hint, length, flags);
 1507         KASSERT((align & (align - 1)) == 0);
 1508         KASSERT((flags & UVM_FLAG_FIXED) == 0 || align == 0);
 1509 
 1510         uvm_tree_sanity(map, "map_findspace entry");
 1511 
 1512         /*
 1513          * remember the original hint.  if we are aligning, then we
 1514          * may have to try again with no alignment constraint if
 1515          * we fail the first time.
 1516          */
 1517 
 1518         orig_hint = hint;
 1519         if (hint < vm_map_min(map)) {   /* check ranges ... */
 1520                 if (flags & UVM_FLAG_FIXED) {
 1521                         UVMHIST_LOG(maphist,"<- VA below map range",0,0,0,0);
 1522                         return (NULL);
 1523                 }
 1524                 hint = vm_map_min(map);
 1525         }
 1526         if (hint > vm_map_max(map)) {
 1527                 UVMHIST_LOG(maphist,"<- VA 0x%x > range [0x%x->0x%x]",
 1528                     hint, vm_map_min(map), vm_map_max(map), 0);
 1529                 return (NULL);
 1530         }
 1531 
 1532         /*
 1533          * Look for the first possible address; if there's already
 1534          * something at this address, we have to start after it.
 1535          */
 1536 
 1537         /*
 1538          * @@@: there are four, no, eight cases to consider.
 1539          *
 1540          * 0: found,     fixed,     bottom up -> fail
 1541          * 1: found,     fixed,     top down  -> fail
 1542          * 2: found,     not fixed, bottom up -> start after entry->end,
 1543          *                                       loop up
 1544          * 3: found,     not fixed, top down  -> start before entry->start,
 1545          *                                       loop down
 1546          * 4: not found, fixed,     bottom up -> check entry->next->start, fail
 1547          * 5: not found, fixed,     top down  -> check entry->next->start, fail
 1548          * 6: not found, not fixed, bottom up -> check entry->next->start,
 1549          *                                       loop up
 1550          * 7: not found, not fixed, top down  -> check entry->next->start,
 1551          *                                       loop down
 1552          *
 1553          * as you can see, it reduces to roughly five cases, and that
 1554          * adding top down mapping only adds one unique case (without
 1555          * it, there would be four cases).
 1556          */
 1557 
 1558         if ((flags & UVM_FLAG_FIXED) == 0 && hint == vm_map_min(map)) {
 1559                 entry = map->first_free;
 1560         } else {
 1561                 if (uvm_map_lookup_entry(map, hint, &entry)) {
 1562                         /* "hint" address already in use ... */
 1563                         if (flags & UVM_FLAG_FIXED) {
 1564                                 UVMHIST_LOG(maphist, "<- fixed & VA in use",
 1565                                     0, 0, 0, 0);
 1566                                 return (NULL);
 1567                         }
 1568                         if (topdown)
 1569                                 /* Start from lower gap. */
 1570                                 entry = entry->prev;
 1571                 } else if (flags & UVM_FLAG_FIXED) {
 1572                         if (entry->next->start >= hint + length &&
 1573                             hint + length > hint)
 1574                                 goto found;
 1575 
 1576                         /* "hint" address is gap but too small */
 1577                         UVMHIST_LOG(maphist, "<- fixed mapping failed",
 1578                             0, 0, 0, 0);
 1579                         return (NULL); /* only one shot at it ... */
 1580                 } else {
 1581                         /*
 1582                          * See if given hint fits in this gap.
 1583                          */
 1584                         switch (uvm_map_space_avail(&hint, length,
 1585                             uoffset, align, topdown, entry)) {
 1586                         case 1:
 1587                                 goto found;
 1588                         case -1:
 1589                                 goto wraparound;
 1590                         }
 1591 
 1592                         if (topdown) {
 1593                                 /*
 1594                                  * Still there is a chance to fit
 1595                                  * if hint > entry->end.
 1596                                  */
 1597                         } else {
 1598                                 /* Start from higher gap. */
 1599                                 entry = entry->next;
 1600                                 if (entry == &map->header)
 1601                                         goto notfound;
 1602                                 goto nextgap;
 1603                         }
 1604                 }
 1605         }
 1606 
 1607         /*
 1608          * Note that all UVM_FLAGS_FIXED case is already handled.
 1609          */
 1610         KDASSERT((flags & UVM_FLAG_FIXED) == 0);
 1611 
 1612         /* Try to find the space in the red-black tree */
 1613 
 1614         /* Check slot before any entry */
 1615         hint = topdown ? entry->next->start - length : entry->end;
 1616         switch (uvm_map_space_avail(&hint, length, uoffset, align,
 1617             topdown, entry)) {
 1618         case 1:
 1619                 goto found;
 1620         case -1:
 1621                 goto wraparound;
 1622         }
 1623 
 1624 nextgap:
 1625         KDASSERT((flags & UVM_FLAG_FIXED) == 0);
 1626         /* If there is not enough space in the whole tree, we fail */
 1627         tmp = RB_ROOT(&map->rbhead);
 1628         if (tmp == NULL || tmp->space < length)
 1629                 goto notfound;
 1630 
 1631         prev = NULL; /* previous candidate */
 1632 
 1633         /* Find an entry close to hint that has enough space */
 1634         for (; tmp;) {
 1635                 KASSERT(tmp->next->start == tmp->end + tmp->ownspace);
 1636                 if (topdown) {
 1637                         if (tmp->next->start < hint + length &&
 1638                             (prev == NULL || tmp->end > prev->end)) {
 1639                                 if (tmp->ownspace >= length)
 1640                                         prev = tmp;
 1641                                 else if ((child = RB_LEFT(tmp, rb_entry))
 1642                                     != NULL && child->space >= length)
 1643                                         prev = tmp;
 1644                         }
 1645                 } else {
 1646                         if (tmp->end >= hint &&
 1647                             (prev == NULL || tmp->end < prev->end)) {
 1648                                 if (tmp->ownspace >= length)
 1649                                         prev = tmp;
 1650                                 else if ((child = RB_RIGHT(tmp, rb_entry))
 1651                                     != NULL && child->space >= length)
 1652                                         prev = tmp;
 1653                         }
 1654                 }
 1655                 if (tmp->next->start < hint + length)
 1656                         child = RB_RIGHT(tmp, rb_entry);
 1657                 else if (tmp->end > hint)
 1658                         child = RB_LEFT(tmp, rb_entry);
 1659                 else {
 1660                         if (tmp->ownspace >= length)
 1661                                 break;
 1662                         if (topdown)
 1663                                 child = RB_LEFT(tmp, rb_entry);
 1664                         else
 1665                                 child = RB_RIGHT(tmp, rb_entry);
 1666                 }
 1667                 if (child == NULL || child->space < length)
 1668                         break;
 1669                 tmp = child;
 1670         }
 1671 
 1672         if (tmp != NULL && tmp->start < hint && hint < tmp->next->start) {
 1673                 /*
 1674                  * Check if the entry that we found satifies the
 1675                  * space requirement
 1676                  */
 1677                 if (topdown) {
 1678                         if (hint > tmp->next->start - length)
 1679                                 hint = tmp->next->start - length;
 1680                 } else {
 1681                         if (hint < tmp->end)
 1682                                 hint = tmp->end;
 1683                 }
 1684                 switch (uvm_map_space_avail(&hint, length, uoffset, align,
 1685                     topdown, tmp)) {
 1686                 case 1:
 1687                         entry = tmp;
 1688                         goto found;
 1689                 case -1:
 1690                         goto wraparound;
 1691                 }
 1692                 if (tmp->ownspace >= length)
 1693                         goto listsearch;
 1694         }
 1695         if (prev == NULL)
 1696                 goto notfound;
 1697 
 1698         if (topdown) {
 1699                 KASSERT(orig_hint >= prev->next->start - length ||
 1700                     prev->next->start - length > prev->next->start);
 1701                 hint = prev->next->start - length;
 1702         } else {
 1703                 KASSERT(orig_hint <= prev->end);
 1704                 hint = prev->end;
 1705         }
 1706         switch (uvm_map_space_avail(&hint, length, uoffset, align,
 1707             topdown, prev)) {
 1708         case 1:
 1709                 entry = prev;
 1710                 goto found;
 1711         case -1:
 1712                 goto wraparound;
 1713         }
 1714         if (prev->ownspace >= length)
 1715                 goto listsearch;
 1716 
 1717         if (topdown)
 1718                 tmp = RB_LEFT(prev, rb_entry);
 1719         else
 1720                 tmp = RB_RIGHT(prev, rb_entry);
 1721         for (;;) {
 1722                 KASSERT(tmp && tmp->space >= length);
 1723                 if (topdown)
 1724                         child = RB_RIGHT(tmp, rb_entry);
 1725                 else
 1726                         child = RB_LEFT(tmp, rb_entry);
 1727                 if (child && child->space >= length) {
 1728                         tmp = child;
 1729                         continue;
 1730                 }
 1731                 if (tmp->ownspace >= length)
 1732                         break;
 1733                 if (topdown)
 1734                         tmp = RB_LEFT(tmp, rb_entry);
 1735                 else
 1736                         tmp = RB_RIGHT(tmp, rb_entry);
 1737         }
 1738 
 1739         if (topdown) {
 1740                 KASSERT(orig_hint >= tmp->next->start - length ||
 1741                     tmp->next->start - length > tmp->next->start);
 1742                 hint = tmp->next->start - length;
 1743         } else {
 1744                 KASSERT(orig_hint <= tmp->end);
 1745                 hint = tmp->end;
 1746         }
 1747         switch (uvm_map_space_avail(&hint, length, uoffset, align,
 1748             topdown, tmp)) {
 1749         case 1:
 1750                 entry = tmp;
 1751                 goto found;
 1752         case -1:
 1753                 goto wraparound;
 1754         }
 1755 
 1756         /*
 1757          * The tree fails to find an entry because of offset or alignment
 1758          * restrictions.  Search the list instead.
 1759          */
 1760  listsearch:
 1761         /*
 1762          * Look through the rest of the map, trying to fit a new region in
 1763          * the gap between existing regions, or after the very last region.
 1764          * note: entry->end = base VA of current gap,
 1765          *       entry->next->start = VA of end of current gap
 1766          */
 1767 
 1768         for (;;) {
 1769                 /* Update hint for current gap. */
 1770                 hint = topdown ? entry->next->start - length : entry->end;
 1771 
 1772                 /* See if it fits. */
 1773                 switch (uvm_map_space_avail(&hint, length, uoffset, align,
 1774                     topdown, entry)) {
 1775                 case 1:
 1776                         goto found;
 1777                 case -1:
 1778                         goto wraparound;
 1779                 }
 1780 
 1781                 /* Advance to next/previous gap */
 1782                 if (topdown) {
 1783                         if (entry == &map->header) {
 1784                                 UVMHIST_LOG(maphist, "<- failed (off start)",
 1785                                     0,0,0,0);
 1786                                 goto notfound;
 1787                         }
 1788                         entry = entry->prev;
 1789                 } else {
 1790                         entry = entry->next;
 1791                         if (entry == &map->header) {
 1792                                 UVMHIST_LOG(maphist, "<- failed (off end)",
 1793                                     0,0,0,0);
 1794                                 goto notfound;
 1795                         }
 1796                 }
 1797         }
 1798 
 1799  found:
 1800         SAVE_HINT(map, map->hint, entry);
 1801         *result = hint;
 1802         UVMHIST_LOG(maphist,"<- got it!  (result=0x%x)", hint, 0,0,0);
 1803         KASSERT( topdown || hint >= orig_hint);
 1804         KASSERT(!topdown || hint <= orig_hint);
 1805         KASSERT(entry->end <= hint);
 1806         KASSERT(hint + length <= entry->next->start);
 1807         return (entry);
 1808 
 1809  wraparound:
 1810         UVMHIST_LOG(maphist, "<- failed (wrap around)", 0,0,0,0);
 1811 
 1812         return (NULL);
 1813 
 1814  notfound:
 1815         UVMHIST_LOG(maphist, "<- failed (notfound)", 0,0,0,0);
 1816 
 1817         return (NULL);
 1818 }
 1819 
 1820 /*
 1821  *   U N M A P   -   m a i n   h e l p e r   f u n c t i o n s
 1822  */
 1823 
 1824 /*
 1825  * uvm_unmap_remove: remove mappings from a vm_map (from "start" up to "stop")
 1826  *
 1827  * => caller must check alignment and size
 1828  * => map must be locked by caller
 1829  * => we return a list of map entries that we've remove from the map
 1830  *    in "entry_list"
 1831  */
 1832 
 1833 void
 1834 uvm_unmap_remove(struct vm_map *map, vaddr_t start, vaddr_t end,
 1835     struct vm_map_entry **entry_list /* OUT */,
 1836     struct uvm_mapent_reservation *umr)
 1837 {
 1838         struct vm_map_entry *entry, *first_entry, *next;
 1839         vaddr_t len;
 1840         UVMHIST_FUNC("uvm_unmap_remove"); UVMHIST_CALLED(maphist);
 1841 
 1842         UVMHIST_LOG(maphist,"(map=0x%x, start=0x%x, end=0x%x)",
 1843             map, start, end, 0);
 1844         VM_MAP_RANGE_CHECK(map, start, end);
 1845 
 1846         uvm_tree_sanity(map, "unmap_remove entry");
 1847 
 1848         /*
 1849          * find first entry
 1850          */
 1851 
 1852         if (uvm_map_lookup_entry(map, start, &first_entry) == TRUE) {
 1853                 /* clip and go... */
 1854                 entry = first_entry;
 1855                 UVM_MAP_CLIP_START(map, entry, start, umr);
 1856                 /* critical!  prevents stale hint */
 1857                 SAVE_HINT(map, entry, entry->prev);
 1858         } else {
 1859                 entry = first_entry->next;
 1860         }
 1861 
 1862         /*
 1863          * Save the free space hint
 1864          */
 1865 
 1866         if (map->first_free->start >= start)
 1867                 map->first_free = entry->prev;
 1868 
 1869         /*
 1870          * note: we now re-use first_entry for a different task.  we remove
 1871          * a number of map entries from the map and save them in a linked
 1872          * list headed by "first_entry".  once we remove them from the map
 1873          * the caller should unlock the map and drop the references to the
 1874          * backing objects [c.f. uvm_unmap_detach].  the object is to
 1875          * separate unmapping from reference dropping.  why?
 1876          *   [1] the map has to be locked for unmapping
 1877          *   [2] the map need not be locked for reference dropping
 1878          *   [3] dropping references may trigger pager I/O, and if we hit
 1879          *       a pager that does synchronous I/O we may have to wait for it.
 1880          *   [4] we would like all waiting for I/O to occur with maps unlocked
 1881          *       so that we don't block other threads.
 1882          */
 1883 
 1884         first_entry = NULL;
 1885         *entry_list = NULL;
 1886 
 1887         /*
 1888          * break up the area into map entry sized regions and unmap.  note
 1889          * that all mappings have to be removed before we can even consider
 1890          * dropping references to amaps or VM objects (otherwise we could end
 1891          * up with a mapping to a page on the free list which would be very bad)
 1892          */
 1893 
 1894         while ((entry != &map->header) && (entry->start < end)) {
 1895                 KASSERT((entry->flags & UVM_MAP_FIRST) == 0);
 1896 
 1897                 UVM_MAP_CLIP_END(map, entry, end, umr);
 1898                 next = entry->next;
 1899                 len = entry->end - entry->start;
 1900 
 1901                 /*
 1902                  * unwire before removing addresses from the pmap; otherwise
 1903                  * unwiring will put the entries back into the pmap (XXX).
 1904                  */
 1905 
 1906                 if (VM_MAPENT_ISWIRED(entry)) {
 1907                         uvm_map_entry_unwire(map, entry);
 1908                 }
 1909                 if ((map->flags & VM_MAP_PAGEABLE) == 0) {
 1910 
 1911                         /*
 1912                          * if the map is non-pageable, any pages mapped there
 1913                          * must be wired and entered with pmap_kenter_pa(),
 1914                          * and we should free any such pages immediately.
 1915                          * this is mostly used for kmem_map and mb_map.
 1916                          */
 1917 
 1918                         if ((entry->flags & UVM_MAP_KMAPENT) == 0) {
 1919                                 uvm_km_pgremove_intrsafe(entry->start,
 1920                                     entry->end);
 1921                                 pmap_kremove(entry->start, len);
 1922                         }
 1923                 } else if (UVM_ET_ISOBJ(entry) &&
 1924                            UVM_OBJ_IS_KERN_OBJECT(entry->object.uvm_obj)) {
 1925                         KASSERT(vm_map_pmap(map) == pmap_kernel());
 1926 
 1927                         /*
 1928                          * note: kernel object mappings are currently used in
 1929                          * two ways:
 1930                          *  [1] "normal" mappings of pages in the kernel object
 1931                          *  [2] uvm_km_valloc'd allocations in which we
 1932                          *      pmap_enter in some non-kernel-object page
 1933                          *      (e.g. vmapbuf).
 1934                          *
 1935                          * for case [1], we need to remove the mapping from
 1936                          * the pmap and then remove the page from the kernel
 1937                          * object (because, once pages in a kernel object are
 1938                          * unmapped they are no longer needed, unlike, say,
 1939                          * a vnode where you might want the data to persist
 1940                          * until flushed out of a queue).
 1941                          *
 1942                          * for case [2], we need to remove the mapping from
 1943                          * the pmap.  there shouldn't be any pages at the
 1944                          * specified offset in the kernel object [but it
 1945                          * doesn't hurt to call uvm_km_pgremove just to be
 1946                          * safe?]
 1947                          *
 1948                          * uvm_km_pgremove currently does the following:
 1949                          *   for pages in the kernel object in range:
 1950                          *     - drops the swap slot
 1951                          *     - uvm_pagefree the page
 1952                          */
 1953 
 1954                         /*
 1955                          * remove mappings from pmap and drop the pages
 1956                          * from the object.  offsets are always relative
 1957                          * to vm_map_min(kernel_map).
 1958                          */
 1959 
 1960                         pmap_remove(pmap_kernel(), entry->start,
 1961                             entry->start + len);
 1962                         uvm_km_pgremove(entry->object.uvm_obj,
 1963                             entry->start - vm_map_min(kernel_map),
 1964                             entry->end - vm_map_min(kernel_map));
 1965 
 1966                         /*
 1967                          * null out kernel_object reference, we've just
 1968                          * dropped it
 1969                          */
 1970 
 1971                         entry->etype &= ~UVM_ET_OBJ;
 1972                         entry->object.uvm_obj = NULL;
 1973                 } else if (UVM_ET_ISOBJ(entry) || entry->aref.ar_amap) {
 1974 
 1975                         /*
 1976                          * remove mappings the standard way.
 1977                          */
 1978 
 1979                         pmap_remove(map->pmap, entry->start, entry->end);
 1980                 }
 1981 
 1982 #if defined(DEBUG)
 1983                 if ((entry->flags & UVM_MAP_KMAPENT) == 0) {
 1984 
 1985                         /*
 1986                          * check if there's remaining mapping,
 1987                          * which is a bug in caller.
 1988                          */
 1989 
 1990                         vaddr_t va;
 1991                         for (va = entry->start; va < entry->end;
 1992                             va += PAGE_SIZE) {
 1993                                 if (pmap_extract(vm_map_pmap(map), va, NULL)) {
 1994                                         panic("uvm_unmap_remove: has mapping");
 1995                                 }
 1996                         }
 1997                 }
 1998 #endif /* defined(DEBUG) */
 1999 
 2000                 /*
 2001                  * remove entry from map and put it on our list of entries
 2002                  * that we've nuked.  then go to next entry.
 2003                  */
 2004 
 2005                 UVMHIST_LOG(maphist, "  removed map entry 0x%x", entry, 0, 0,0);
 2006 
 2007                 /* critical!  prevents stale hint */
 2008                 SAVE_HINT(map, entry, entry->prev);
 2009 
 2010                 uvm_map_entry_unlink(map, entry);
 2011                 KASSERT(map->size >= len);
 2012                 map->size -= len;
 2013                 entry->prev = NULL;
 2014                 entry->next = first_entry;
 2015                 first_entry = entry;
 2016                 entry = next;
 2017         }
 2018         if ((map->flags & VM_MAP_DYING) == 0) {
 2019                 pmap_update(vm_map_pmap(map));
 2020         }
 2021 
 2022         uvm_tree_sanity(map, "unmap_remove leave");
 2023 
 2024         /*
 2025          * now we've cleaned up the map and are ready for the caller to drop
 2026          * references to the mapped objects.
 2027          */
 2028 
 2029         *entry_list = first_entry;
 2030         UVMHIST_LOG(maphist,"<- done!", 0, 0, 0, 0);
 2031 
 2032         simple_lock(&map->flags_lock);
 2033         if (map->flags & VM_MAP_WANTVA) {
 2034                 map->flags &= ~VM_MAP_WANTVA;
 2035                 wakeup(&map->header);
 2036         }
 2037         simple_unlock(&map->flags_lock);
 2038 }
 2039 
 2040 /*
 2041  * uvm_unmap_detach: drop references in a chain of map entries
 2042  *
 2043  * => we will free the map entries as we traverse the list.
 2044  */
 2045 
 2046 void
 2047 uvm_unmap_detach(struct vm_map_entry *first_entry, int flags)
 2048 {
 2049         struct vm_map_entry *next_entry;
 2050         UVMHIST_FUNC("uvm_unmap_detach"); UVMHIST_CALLED(maphist);
 2051 
 2052         while (first_entry) {
 2053                 KASSERT(!VM_MAPENT_ISWIRED(first_entry));
 2054                 UVMHIST_LOG(maphist,
 2055                     "  detach 0x%x: amap=0x%x, obj=0x%x, submap?=%d",
 2056                     first_entry, first_entry->aref.ar_amap,
 2057                     first_entry->object.uvm_obj,
 2058                     UVM_ET_ISSUBMAP(first_entry));
 2059 
 2060                 /*
 2061                  * drop reference to amap, if we've got one
 2062                  */
 2063 
 2064                 if (first_entry->aref.ar_amap)
 2065                         uvm_map_unreference_amap(first_entry, flags);
 2066 
 2067                 /*
 2068                  * drop reference to our backing object, if we've got one
 2069                  */
 2070 
 2071                 KASSERT(!UVM_ET_ISSUBMAP(first_entry));
 2072                 if (UVM_ET_ISOBJ(first_entry) &&
 2073                     first_entry->object.uvm_obj->pgops->pgo_detach) {
 2074                         (*first_entry->object.uvm_obj->pgops->pgo_detach)
 2075                                 (first_entry->object.uvm_obj);
 2076                 }
 2077                 next_entry = first_entry->next;
 2078                 uvm_mapent_free(first_entry);
 2079                 first_entry = next_entry;
 2080         }
 2081         UVMHIST_LOG(maphist, "<- done", 0,0,0,0);
 2082 }
 2083 
 2084 /*
 2085  *   E X T R A C T I O N   F U N C T I O N S
 2086  */
 2087 
 2088 /*
 2089  * uvm_map_reserve: reserve space in a vm_map for future use.
 2090  *
 2091  * => we reserve space in a map by putting a dummy map entry in the
 2092  *    map (dummy means obj=NULL, amap=NULL, prot=VM_PROT_NONE)
 2093  * => map should be unlocked (we will write lock it)
 2094  * => we return true if we were able to reserve space
 2095  * => XXXCDC: should be inline?
 2096  */
 2097 
 2098 int
 2099 uvm_map_reserve(struct vm_map *map, vsize_t size,
 2100     vaddr_t offset      /* hint for pmap_prefer */,
 2101     vsize_t align       /* alignment hint */,
 2102     vaddr_t *raddr      /* IN:hint, OUT: reserved VA */)
 2103 {
 2104         UVMHIST_FUNC("uvm_map_reserve"); UVMHIST_CALLED(maphist);
 2105 
 2106         UVMHIST_LOG(maphist, "(map=0x%x, size=0x%x, offset=0x%x,addr=0x%x)",
 2107             map,size,offset,raddr);
 2108 
 2109         size = round_page(size);
 2110         if (*raddr < vm_map_min(map))
 2111                 *raddr = vm_map_min(map);               /* hint */
 2112 
 2113         /*
 2114          * reserve some virtual space.
 2115          */
 2116 
 2117         if (uvm_map(map, raddr, size, NULL, offset, 0,
 2118             UVM_MAPFLAG(UVM_PROT_NONE, UVM_PROT_NONE, UVM_INH_NONE,
 2119             UVM_ADV_RANDOM, UVM_FLAG_NOMERGE)) != 0) {
 2120             UVMHIST_LOG(maphist, "<- done (no VM)", 0,0,0,0);
 2121                 return (FALSE);
 2122         }
 2123 
 2124         UVMHIST_LOG(maphist, "<- done (*raddr=0x%x)", *raddr,0,0,0);
 2125         return (TRUE);
 2126 }
 2127 
 2128 /*
 2129  * uvm_map_replace: replace a reserved (blank) area of memory with
 2130  * real mappings.
 2131  *
 2132  * => caller must WRITE-LOCK the map
 2133  * => we return TRUE if replacement was a success
 2134  * => we expect the newents chain to have nnewents entrys on it and
 2135  *    we expect newents->prev to point to the last entry on the list
 2136  * => note newents is allowed to be NULL
 2137  */
 2138 
 2139 int
 2140 uvm_map_replace(struct vm_map *map, vaddr_t start, vaddr_t end,
 2141     struct vm_map_entry *newents, int nnewents)
 2142 {
 2143         struct vm_map_entry *oldent, *last;
 2144 
 2145         uvm_tree_sanity(map, "map_replace entry");
 2146 
 2147         /*
 2148          * first find the blank map entry at the specified address
 2149          */
 2150 
 2151         if (!uvm_map_lookup_entry(map, start, &oldent)) {
 2152                 return (FALSE);
 2153         }
 2154 
 2155         /*
 2156          * check to make sure we have a proper blank entry
 2157          */
 2158 
 2159         if (oldent->start != start || oldent->end != end ||
 2160             oldent->object.uvm_obj != NULL || oldent->aref.ar_amap != NULL) {
 2161                 return (FALSE);
 2162         }
 2163 
 2164 #ifdef DIAGNOSTIC
 2165 
 2166         /*
 2167          * sanity check the newents chain
 2168          */
 2169 
 2170         {
 2171                 struct vm_map_entry *tmpent = newents;
 2172                 int nent = 0;
 2173                 vaddr_t cur = start;
 2174 
 2175                 while (tmpent) {
 2176                         nent++;
 2177                         if (tmpent->start < cur)
 2178                                 panic("uvm_map_replace1");
 2179                         if (tmpent->start > tmpent->end || tmpent->end > end) {
 2180                 printf("tmpent->start=0x%lx, tmpent->end=0x%lx, end=0x%lx\n",
 2181                             tmpent->start, tmpent->end, end);
 2182                                 panic("uvm_map_replace2");
 2183                         }
 2184                         cur = tmpent->end;
 2185                         if (tmpent->next) {
 2186                                 if (tmpent->next->prev != tmpent)
 2187                                         panic("uvm_map_replace3");
 2188                         } else {
 2189                                 if (newents->prev != tmpent)
 2190                                         panic("uvm_map_replace4");
 2191                         }
 2192                         tmpent = tmpent->next;
 2193                 }
 2194                 if (nent != nnewents)
 2195                         panic("uvm_map_replace5");
 2196         }
 2197 #endif
 2198 
 2199         /*
 2200          * map entry is a valid blank!   replace it.   (this does all the
 2201          * work of map entry link/unlink...).
 2202          */
 2203 
 2204         if (newents) {
 2205                 last = newents->prev;
 2206 
 2207                 /* critical: flush stale hints out of map */
 2208                 SAVE_HINT(map, map->hint, newents);
 2209                 if (map->first_free == oldent)
 2210                         map->first_free = last;
 2211 
 2212                 last->next = oldent->next;
 2213                 last->next->prev = last;
 2214 
 2215                 /* Fix RB tree */
 2216                 uvm_rb_remove(map, oldent);
 2217 
 2218                 newents->prev = oldent->prev;
 2219                 newents->prev->next = newents;
 2220                 map->nentries = map->nentries + (nnewents - 1);
 2221 
 2222                 /* Fixup the RB tree */
 2223                 {
 2224                         int i;
 2225                         struct vm_map_entry *tmp;
 2226 
 2227                         tmp = newents;
 2228                         for (i = 0; i < nnewents && tmp; i++) {
 2229                                 uvm_rb_insert(map, tmp);
 2230                                 tmp = tmp->next;
 2231                         }
 2232                 }
 2233         } else {
 2234 
 2235                 /* critical: flush stale hints out of map */
 2236                 SAVE_HINT(map, map->hint, oldent->prev);
 2237                 if (map->first_free == oldent)
 2238                         map->first_free = oldent->prev;
 2239 
 2240                 /* NULL list of new entries: just remove the old one */
 2241                 uvm_map_entry_unlink(map, oldent);
 2242         }
 2243 
 2244         uvm_tree_sanity(map, "map_replace leave");
 2245 
 2246         /*
 2247          * now we can free the old blank entry, unlock the map and return.
 2248          */
 2249 
 2250         uvm_mapent_free(oldent);
 2251         return (TRUE);
 2252 }
 2253 
 2254 /*
 2255  * uvm_map_extract: extract a mapping from a map and put it somewhere
 2256  *      (maybe removing the old mapping)
 2257  *
 2258  * => maps should be unlocked (we will write lock them)
 2259  * => returns 0 on success, error code otherwise
 2260  * => start must be page aligned
 2261  * => len must be page sized
 2262  * => flags:
 2263  *      UVM_EXTRACT_REMOVE: remove mappings from srcmap
 2264  *      UVM_EXTRACT_CONTIG: abort if unmapped area (advisory only)
 2265  *      UVM_EXTRACT_QREF: for a temporary extraction do quick obj refs
 2266  *      UVM_EXTRACT_FIXPROT: set prot to maxprot as we go
 2267  *    >>>NOTE: if you set REMOVE, you are not allowed to use CONTIG or QREF!<<<
 2268  *    >>>NOTE: QREF's must be unmapped via the QREF path, thus should only
 2269  *             be used from within the kernel in a kernel level map <<<
 2270  */
 2271 
 2272 int
 2273 uvm_map_extract(struct vm_map *srcmap, vaddr_t start, vsize_t len,
 2274     struct vm_map *dstmap, vaddr_t *dstaddrp, int flags)
 2275 {
 2276         vaddr_t dstaddr, end, newend, oldoffset, fudge, orig_fudge;
 2277         struct vm_map_entry *chain, *endchain, *entry, *orig_entry, *newentry,
 2278             *deadentry, *oldentry;
 2279         vsize_t elen;
 2280         int nchain, error, copy_ok;
 2281         UVMHIST_FUNC("uvm_map_extract"); UVMHIST_CALLED(maphist);
 2282 
 2283         UVMHIST_LOG(maphist,"(srcmap=0x%x,start=0x%x, len=0x%x", srcmap, start,
 2284             len,0);
 2285         UVMHIST_LOG(maphist," ...,dstmap=0x%x, flags=0x%x)", dstmap,flags,0,0);
 2286 
 2287         uvm_tree_sanity(srcmap, "map_extract src enter");
 2288         uvm_tree_sanity(dstmap, "map_extract dst enter");
 2289 
 2290         /*
 2291          * step 0: sanity check: start must be on a page boundary, length
 2292          * must be page sized.  can't ask for CONTIG/QREF if you asked for
 2293          * REMOVE.
 2294          */
 2295 
 2296         KASSERT((start & PAGE_MASK) == 0 && (len & PAGE_MASK) == 0);
 2297         KASSERT((flags & UVM_EXTRACT_REMOVE) == 0 ||
 2298                 (flags & (UVM_EXTRACT_CONTIG|UVM_EXTRACT_QREF)) == 0);
 2299 
 2300         /*
 2301          * step 1: reserve space in the target map for the extracted area
 2302          */
 2303 
 2304         dstaddr = vm_map_min(dstmap);
 2305         if (uvm_map_reserve(dstmap, len, start, 0, &dstaddr) == FALSE)
 2306                 return (ENOMEM);
 2307         *dstaddrp = dstaddr;    /* pass address back to caller */
 2308         UVMHIST_LOG(maphist, "  dstaddr=0x%x", dstaddr,0,0,0);
 2309 
 2310         /*
 2311          * step 2: setup for the extraction process loop by init'ing the
 2312          * map entry chain, locking src map, and looking up the first useful
 2313          * entry in the map.
 2314          */
 2315 
 2316         end = start + len;
 2317         newend = dstaddr + len;
 2318         chain = endchain = NULL;
 2319         nchain = 0;
 2320         vm_map_lock(srcmap);
 2321 
 2322         if (uvm_map_lookup_entry(srcmap, start, &entry)) {
 2323 
 2324                 /* "start" is within an entry */
 2325                 if (flags & UVM_EXTRACT_QREF) {
 2326 
 2327                         /*
 2328                          * for quick references we don't clip the entry, so
 2329                          * the entry may map space "before" the starting
 2330                          * virtual address... this is the "fudge" factor
 2331                          * (which can be non-zero only the first time
 2332                          * through the "while" loop in step 3).
 2333                          */
 2334 
 2335                         fudge = start - entry->start;
 2336                 } else {
 2337 
 2338                         /*
 2339                          * normal reference: we clip the map to fit (thus
 2340                          * fudge is zero)
 2341                          */
 2342 
 2343                         UVM_MAP_CLIP_START(srcmap, entry, start, NULL);
 2344                         SAVE_HINT(srcmap, srcmap->hint, entry->prev);
 2345                         fudge = 0;
 2346                 }
 2347         } else {
 2348 
 2349                 /* "start" is not within an entry ... skip to next entry */
 2350                 if (flags & UVM_EXTRACT_CONTIG) {
 2351                         error = EINVAL;
 2352                         goto bad;    /* definite hole here ... */
 2353                 }
 2354 
 2355                 entry = entry->next;
 2356                 fudge = 0;
 2357         }
 2358 
 2359         /* save values from srcmap for step 6 */
 2360         orig_entry = entry;
 2361         orig_fudge = fudge;
 2362 
 2363         /*
 2364          * step 3: now start looping through the map entries, extracting
 2365          * as we go.
 2366          */
 2367 
 2368         while (entry->start < end && entry != &srcmap->header) {
 2369 
 2370                 /* if we are not doing a quick reference, clip it */
 2371                 if ((flags & UVM_EXTRACT_QREF) == 0)
 2372                         UVM_MAP_CLIP_END(srcmap, entry, end, NULL);
 2373 
 2374                 /* clear needs_copy (allow chunking) */
 2375                 if (UVM_ET_ISNEEDSCOPY(entry)) {
 2376                         amap_copy(srcmap, entry, M_NOWAIT, TRUE, start, end);
 2377                         if (UVM_ET_ISNEEDSCOPY(entry)) {  /* failed? */
 2378                                 error = ENOMEM;
 2379                                 goto bad;
 2380                         }
 2381 
 2382                         /* amap_copy could clip (during chunk)!  update fudge */
 2383                         if (fudge) {
 2384                                 fudge = start - entry->start;
 2385                                 orig_fudge = fudge;
 2386                         }
 2387                 }
 2388 
 2389                 /* calculate the offset of this from "start" */
 2390                 oldoffset = (entry->start + fudge) - start;
 2391 
 2392                 /* allocate a new map entry */
 2393                 newentry = uvm_mapent_alloc(dstmap, 0);
 2394                 if (newentry == NULL) {
 2395                         error = ENOMEM;
 2396                         goto bad;
 2397                 }
 2398 
 2399                 /* set up new map entry */
 2400                 newentry->next = NULL;
 2401                 newentry->prev = endchain;
 2402                 newentry->start = dstaddr + oldoffset;
 2403                 newentry->end =
 2404                     newentry->start + (entry->end - (entry->start + fudge));
 2405                 if (newentry->end > newend || newentry->end < newentry->start)
 2406                         newentry->end = newend;
 2407                 newentry->object.uvm_obj = entry->object.uvm_obj;
 2408                 if (newentry->object.uvm_obj) {
 2409                         if (newentry->object.uvm_obj->pgops->pgo_reference)
 2410                                 newentry->object.uvm_obj->pgops->
 2411                                     pgo_reference(newentry->object.uvm_obj);
 2412                                 newentry->offset = entry->offset + fudge;
 2413                 } else {
 2414                         newentry->offset = 0;
 2415                 }
 2416                 newentry->etype = entry->etype;
 2417                 newentry->protection = (flags & UVM_EXTRACT_FIXPROT) ?
 2418                         entry->max_protection : entry->protection;
 2419                 newentry->max_protection = entry->max_protection;
 2420                 newentry->inheritance = entry->inheritance;
 2421                 newentry->wired_count = 0;
 2422                 newentry->aref.ar_amap = entry->aref.ar_amap;
 2423                 if (newentry->aref.ar_amap) {
 2424                         newentry->aref.ar_pageoff =
 2425                             entry->aref.ar_pageoff + (fudge >> PAGE_SHIFT);
 2426                         uvm_map_reference_amap(newentry, AMAP_SHARED |
 2427                             ((flags & UVM_EXTRACT_QREF) ? AMAP_REFALL : 0));
 2428                 } else {
 2429                         newentry->aref.ar_pageoff = 0;
 2430                 }
 2431                 newentry->advice = entry->advice;
 2432 
 2433                 /* now link it on the chain */
 2434                 nchain++;
 2435                 if (endchain == NULL) {
 2436                         chain = endchain = newentry;
 2437                 } else {
 2438                         endchain->next = newentry;
 2439                         endchain = newentry;
 2440                 }
 2441 
 2442                 /* end of 'while' loop! */
 2443                 if ((flags & UVM_EXTRACT_CONTIG) && entry->end < end &&
 2444                     (entry->next == &srcmap->header ||
 2445                     entry->next->start != entry->end)) {
 2446                         error = EINVAL;
 2447                         goto bad;
 2448                 }
 2449                 entry = entry->next;
 2450                 fudge = 0;
 2451         }
 2452 
 2453         /*
 2454          * step 4: close off chain (in format expected by uvm_map_replace)
 2455          */
 2456 
 2457         if (chain)
 2458                 chain->prev = endchain;
 2459 
 2460         /*
 2461          * step 5: attempt to lock the dest map so we can pmap_copy.
 2462          * note usage of copy_ok:
 2463          *   1 => dstmap locked, pmap_copy ok, and we "replace" here (step 5)
 2464          *   0 => dstmap unlocked, NO pmap_copy, and we will "replace" in step 7
 2465          */
 2466 
 2467         if (srcmap == dstmap || vm_map_lock_try(dstmap) == TRUE) {
 2468                 copy_ok = 1;
 2469                 if (!uvm_map_replace(dstmap, dstaddr, dstaddr+len, chain,
 2470                     nchain)) {
 2471                         if (srcmap != dstmap)
 2472                                 vm_map_unlock(dstmap);
 2473                         error = EIO;
 2474                         goto bad;
 2475                 }
 2476         } else {
 2477                 copy_ok = 0;
 2478                 /* replace defered until step 7 */
 2479         }
 2480 
 2481         /*
 2482          * step 6: traverse the srcmap a second time to do the following:
 2483          *  - if we got a lock on the dstmap do pmap_copy
 2484          *  - if UVM_EXTRACT_REMOVE remove the entries
 2485          * we make use of orig_entry and orig_fudge (saved in step 2)
 2486          */
 2487 
 2488         if (copy_ok || (flags & UVM_EXTRACT_REMOVE)) {
 2489 
 2490                 /* purge possible stale hints from srcmap */
 2491                 if (flags & UVM_EXTRACT_REMOVE) {
 2492                         SAVE_HINT(srcmap, srcmap->hint, orig_entry->prev);
 2493                         if (srcmap->first_free->start >= start)
 2494                                 srcmap->first_free = orig_entry->prev;
 2495                 }
 2496 
 2497                 entry = orig_entry;
 2498                 fudge = orig_fudge;
 2499                 deadentry = NULL;       /* for UVM_EXTRACT_REMOVE */
 2500 
 2501                 while (entry->start < end && entry != &srcmap->header) {
 2502                         if (copy_ok) {
 2503                                 oldoffset = (entry->start + fudge) - start;
 2504                                 elen = MIN(end, entry->end) -
 2505                                     (entry->start + fudge);
 2506                                 pmap_copy(dstmap->pmap, srcmap->pmap,
 2507                                     dstaddr + oldoffset, elen,
 2508                                     entry->start + fudge);
 2509                         }
 2510 
 2511                         /* we advance "entry" in the following if statement */
 2512                         if (flags & UVM_EXTRACT_REMOVE) {
 2513                                 pmap_remove(srcmap->pmap, entry->start,
 2514                                                 entry->end);
 2515                                 oldentry = entry;       /* save entry */
 2516                                 entry = entry->next;    /* advance */
 2517                                 uvm_map_entry_unlink(srcmap, oldentry);
 2518                                                         /* add to dead list */
 2519                                 oldentry->next = deadentry;
 2520                                 deadentry = oldentry;
 2521                         } else {
 2522                                 entry = entry->next;            /* advance */
 2523                         }
 2524 
 2525                         /* end of 'while' loop */
 2526                         fudge = 0;
 2527                 }
 2528                 pmap_update(srcmap->pmap);
 2529 
 2530                 /*
 2531                  * unlock dstmap.  we will dispose of deadentry in
 2532                  * step 7 if needed
 2533                  */
 2534 
 2535                 if (copy_ok && srcmap != dstmap)
 2536                         vm_map_unlock(dstmap);
 2537 
 2538         } else {
 2539                 deadentry = NULL;
 2540         }
 2541 
 2542         /*
 2543          * step 7: we are done with the source map, unlock.   if copy_ok
 2544          * is 0 then we have not replaced the dummy mapping in dstmap yet
 2545          * and we need to do so now.
 2546          */
 2547 
 2548         vm_map_unlock(srcmap);
 2549         if ((flags & UVM_EXTRACT_REMOVE) && deadentry)
 2550                 uvm_unmap_detach(deadentry, 0);   /* dispose of old entries */
 2551 
 2552         /* now do the replacement if we didn't do it in step 5 */
 2553         if (copy_ok == 0) {
 2554                 vm_map_lock(dstmap);
 2555                 error = uvm_map_replace(dstmap, dstaddr, dstaddr+len, chain,
 2556                     nchain);
 2557                 vm_map_unlock(dstmap);
 2558 
 2559                 if (error == FALSE) {
 2560                         error = EIO;
 2561                         goto bad2;
 2562                 }
 2563         }
 2564 
 2565         uvm_tree_sanity(srcmap, "map_extract src leave");
 2566         uvm_tree_sanity(dstmap, "map_extract dst leave");
 2567 
 2568         return (0);
 2569 
 2570         /*
 2571          * bad: failure recovery
 2572          */
 2573 bad:
 2574         vm_map_unlock(srcmap);
 2575 bad2:                   /* src already unlocked */
 2576         if (chain)
 2577                 uvm_unmap_detach(chain,
 2578                     (flags & UVM_EXTRACT_QREF) ? AMAP_REFALL : 0);
 2579 
 2580         uvm_tree_sanity(srcmap, "map_extract src err leave");
 2581         uvm_tree_sanity(dstmap, "map_extract dst err leave");
 2582 
 2583         uvm_unmap(dstmap, dstaddr, dstaddr+len);   /* ??? */
 2584         return (error);
 2585 }
 2586 
 2587 /* end of extraction functions */
 2588 
 2589 /*
 2590  * uvm_map_submap: punch down part of a map into a submap
 2591  *
 2592  * => only the kernel_map is allowed to be submapped
 2593  * => the purpose of submapping is to break up the locking granularity
 2594  *      of a larger map
 2595  * => the range specified must have been mapped previously with a uvm_map()
 2596  *      call [with uobj==NULL] to create a blank map entry in the main map.
 2597  *      [And it had better still be blank!]
 2598  * => maps which contain submaps should never be copied or forked.
 2599  * => to remove a submap, use uvm_unmap() on the main map
 2600  *      and then uvm_map_deallocate() the submap.
 2601  * => main map must be unlocked.
 2602  * => submap must have been init'd and have a zero reference count.
 2603  *      [need not be locked as we don't actually reference it]
 2604  */
 2605 
 2606 int
 2607 uvm_map_submap(struct vm_map *map, vaddr_t start, vaddr_t end,
 2608     struct vm_map *submap)
 2609 {
 2610         struct vm_map_entry *entry;
 2611         struct uvm_mapent_reservation umr;
 2612         int error;
 2613 
 2614         uvm_mapent_reserve(map, &umr, 2, 0);
 2615 
 2616         vm_map_lock(map);
 2617         VM_MAP_RANGE_CHECK(map, start, end);
 2618 
 2619         if (uvm_map_lookup_entry(map, start, &entry)) {
 2620                 UVM_MAP_CLIP_START(map, entry, start, &umr);
 2621                 UVM_MAP_CLIP_END(map, entry, end, &umr);        /* to be safe */
 2622         } else {
 2623                 entry = NULL;
 2624         }
 2625 
 2626         if (entry != NULL &&
 2627             entry->start == start && entry->end == end &&
 2628             entry->object.uvm_obj == NULL && entry->aref.ar_amap == NULL &&
 2629             !UVM_ET_ISCOPYONWRITE(entry) && !UVM_ET_ISNEEDSCOPY(entry)) {
 2630                 entry->etype |= UVM_ET_SUBMAP;
 2631                 entry->object.sub_map = submap;
 2632                 entry->offset = 0;
 2633                 uvm_map_reference(submap);
 2634                 error = 0;
 2635         } else {
 2636                 error = EINVAL;
 2637         }
 2638         vm_map_unlock(map);
 2639 
 2640         uvm_mapent_unreserve(map, &umr);
 2641 
 2642         return error;
 2643 }
 2644 
 2645 /*
 2646  * uvm_map_setup_kernel: init in-kernel map
 2647  *
 2648  * => map must not be in service yet.
 2649  */
 2650 
 2651 void
 2652 uvm_map_setup_kernel(struct vm_map_kernel *map,
 2653     vaddr_t min, vaddr_t max, int flags)
 2654 {
 2655 
 2656         uvm_map_setup(&map->vmk_map, min, max, flags);
 2657 
 2658         LIST_INIT(&map->vmk_kentry_free);
 2659         map->vmk_merged_entries = NULL;
 2660 }
 2661 
 2662 
 2663 /*
 2664  * uvm_map_protect: change map protection
 2665  *
 2666  * => set_max means set max_protection.
 2667  * => map must be unlocked.
 2668  */
 2669 
 2670 #define MASK(entry)     (UVM_ET_ISCOPYONWRITE(entry) ? \
 2671                          ~VM_PROT_WRITE : VM_PROT_ALL)
 2672 
 2673 int
 2674 uvm_map_protect(struct vm_map *map, vaddr_t start, vaddr_t end,
 2675     vm_prot_t new_prot, boolean_t set_max)
 2676 {
 2677         struct vm_map_entry *current, *entry;
 2678         int error = 0;
 2679         UVMHIST_FUNC("uvm_map_protect"); UVMHIST_CALLED(maphist);
 2680         UVMHIST_LOG(maphist,"(map=0x%x,start=0x%x,end=0x%x,new_prot=0x%x)",
 2681                     map, start, end, new_prot);
 2682 
 2683         vm_map_lock(map);
 2684         VM_MAP_RANGE_CHECK(map, start, end);
 2685         if (uvm_map_lookup_entry(map, start, &entry)) {
 2686                 UVM_MAP_CLIP_START(map, entry, start, NULL);
 2687         } else {
 2688                 entry = entry->next;
 2689         }
 2690 
 2691         /*
 2692          * make a first pass to check for protection violations.
 2693          */
 2694 
 2695         current = entry;
 2696         while ((current != &map->header) && (current->start < end)) {
 2697                 if (UVM_ET_ISSUBMAP(current)) {
 2698                         error = EINVAL;
 2699                         goto out;
 2700                 }
 2701                 if ((new_prot & current->max_protection) != new_prot) {
 2702                         error = EACCES;
 2703                         goto out;
 2704                 }
 2705                 /*
 2706                  * Don't allow VM_PROT_EXECUTE to be set on entries that
 2707                  * point to vnodes that are associated with a NOEXEC file
 2708                  * system.
 2709                  */
 2710                 if (UVM_ET_ISOBJ(current) &&
 2711                     UVM_OBJ_IS_VNODE(current->object.uvm_obj)) {
 2712                         struct vnode *vp =
 2713                             (struct vnode *) current->object.uvm_obj;
 2714 
 2715                         if ((new_prot & VM_PROT_EXECUTE) != 0 &&
 2716                             (vp->v_mount->mnt_flag & MNT_NOEXEC) != 0) {
 2717                                 error = EACCES;
 2718                                 goto out;
 2719                         }
 2720                 }
 2721                 current = current->next;
 2722         }
 2723 
 2724         /* go back and fix up protections (no need to clip this time). */
 2725 
 2726         current = entry;
 2727         while ((current != &map->header) && (current->start < end)) {
 2728                 vm_prot_t old_prot;
 2729 
 2730                 UVM_MAP_CLIP_END(map, current, end, NULL);
 2731                 old_prot = current->protection;
 2732                 if (set_max)
 2733                         current->protection =
 2734                             (current->max_protection = new_prot) & old_prot;
 2735                 else
 2736                         current->protection = new_prot;
 2737 
 2738                 /*
 2739                  * update physical map if necessary.  worry about copy-on-write
 2740                  * here -- CHECK THIS XXX
 2741                  */
 2742 
 2743                 if (current->protection != old_prot) {
 2744                         /* update pmap! */
 2745                         pmap_protect(map->pmap, current->start, current->end,
 2746                             current->protection & MASK(entry));
 2747 
 2748                         /*
 2749                          * If this entry points at a vnode, and the
 2750                          * protection includes VM_PROT_EXECUTE, mark
 2751                          * the vnode as VEXECMAP.
 2752                          */
 2753                         if (UVM_ET_ISOBJ(current)) {
 2754                                 struct uvm_object *uobj =
 2755                                     current->object.uvm_obj;
 2756 
 2757                                 if (UVM_OBJ_IS_VNODE(uobj) &&
 2758                                     (current->protection & VM_PROT_EXECUTE))
 2759                                         vn_markexec((struct vnode *) uobj);
 2760                         }
 2761                 }
 2762 
 2763                 /*
 2764                  * If the map is configured to lock any future mappings,
 2765                  * wire this entry now if the old protection was VM_PROT_NONE
 2766                  * and the new protection is not VM_PROT_NONE.
 2767                  */
 2768 
 2769                 if ((map->flags & VM_MAP_WIREFUTURE) != 0 &&
 2770                     VM_MAPENT_ISWIRED(entry) == 0 &&
 2771                     old_prot == VM_PROT_NONE &&
 2772                     new_prot != VM_PROT_NONE) {
 2773                         if (uvm_map_pageable(map, entry->start,
 2774                             entry->end, FALSE,
 2775                             UVM_LK_ENTER|UVM_LK_EXIT) != 0) {
 2776 
 2777                                 /*
 2778                                  * If locking the entry fails, remember the
 2779                                  * error if it's the first one.  Note we
 2780                                  * still continue setting the protection in
 2781                                  * the map, but will return the error
 2782                                  * condition regardless.
 2783                                  *
 2784                                  * XXX Ignore what the actual error is,
 2785                                  * XXX just call it a resource shortage
 2786                                  * XXX so that it doesn't get confused
 2787                                  * XXX what uvm_map_protect() itself would
 2788                                  * XXX normally return.
 2789                                  */
 2790 
 2791                                 error = ENOMEM;
 2792                         }
 2793                 }
 2794                 current = current->next;
 2795         }
 2796         pmap_update(map->pmap);
 2797 
 2798  out:
 2799         vm_map_unlock(map);
 2800 
 2801         UVMHIST_LOG(maphist, "<- done, error=%d",error,0,0,0);
 2802         return error;
 2803 }
 2804 
 2805 #undef  MASK
 2806 
 2807 /*
 2808  * uvm_map_inherit: set inheritance code for range of addrs in map.
 2809  *
 2810  * => map must be unlocked
 2811  * => note that the inherit code is used during a "fork".  see fork
 2812  *      code for details.
 2813  */
 2814 
 2815 int
 2816 uvm_map_inherit(struct vm_map *map, vaddr_t start, vaddr_t end,
 2817     vm_inherit_t new_inheritance)
 2818 {
 2819         struct vm_map_entry *entry, *temp_entry;
 2820         UVMHIST_FUNC("uvm_map_inherit"); UVMHIST_CALLED(maphist);
 2821         UVMHIST_LOG(maphist,"(map=0x%x,start=0x%x,end=0x%x,new_inh=0x%x)",
 2822             map, start, end, new_inheritance);
 2823 
 2824         switch (new_inheritance) {
 2825         case MAP_INHERIT_NONE:
 2826         case MAP_INHERIT_COPY:
 2827         case MAP_INHERIT_SHARE:
 2828                 break;
 2829         default:
 2830                 UVMHIST_LOG(maphist,"<- done (INVALID ARG)",0,0,0,0);
 2831                 return EINVAL;
 2832         }
 2833 
 2834         vm_map_lock(map);
 2835         VM_MAP_RANGE_CHECK(map, start, end);
 2836         if (uvm_map_lookup_entry(map, start, &temp_entry)) {
 2837                 entry = temp_entry;
 2838                 UVM_MAP_CLIP_START(map, entry, start, NULL);
 2839         }  else {
 2840                 entry = temp_entry->next;
 2841         }
 2842         while ((entry != &map->header) && (entry->start < end)) {
 2843                 UVM_MAP_CLIP_END(map, entry, end, NULL);
 2844                 entry->inheritance = new_inheritance;
 2845                 entry = entry->next;
 2846         }
 2847         vm_map_unlock(map);
 2848         UVMHIST_LOG(maphist,"<- done (OK)",0,0,0,0);
 2849         return 0;
 2850 }
 2851 
 2852 /*
 2853  * uvm_map_advice: set advice code for range of addrs in map.
 2854  *
 2855  * => map must be unlocked
 2856  */
 2857 
 2858 int
 2859 uvm_map_advice(struct vm_map *map, vaddr_t start, vaddr_t end, int new_advice)
 2860 {
 2861         struct vm_map_entry *entry, *temp_entry;
 2862         UVMHIST_FUNC("uvm_map_advice"); UVMHIST_CALLED(maphist);
 2863         UVMHIST_LOG(maphist,"(map=0x%x,start=0x%x,end=0x%x,new_adv=0x%x)",
 2864             map, start, end, new_advice);
 2865 
 2866         vm_map_lock(map);
 2867         VM_MAP_RANGE_CHECK(map, start, end);
 2868         if (uvm_map_lookup_entry(map, start, &temp_entry)) {
 2869                 entry = temp_entry;
 2870                 UVM_MAP_CLIP_START(map, entry, start, NULL);
 2871         } else {
 2872                 entry = temp_entry->next;
 2873         }
 2874 
 2875         /*
 2876          * XXXJRT: disallow holes?
 2877          */
 2878 
 2879         while ((entry != &map->header) && (entry->start < end)) {
 2880                 UVM_MAP_CLIP_END(map, entry, end, NULL);
 2881 
 2882                 switch (new_advice) {
 2883                 case MADV_NORMAL:
 2884                 case MADV_RANDOM:
 2885                 case MADV_SEQUENTIAL:
 2886                         /* nothing special here */
 2887                         break;
 2888 
 2889                 default:
 2890                         vm_map_unlock(map);
 2891                         UVMHIST_LOG(maphist,"<- done (INVALID ARG)",0,0,0,0);
 2892                         return EINVAL;
 2893                 }
 2894                 entry->advice = new_advice;
 2895                 entry = entry->next;
 2896         }
 2897 
 2898         vm_map_unlock(map);
 2899         UVMHIST_LOG(maphist,"<- done (OK)",0,0,0,0);
 2900         return 0;
 2901 }
 2902 
 2903 /*
 2904  * uvm_map_pageable: sets the pageability of a range in a map.
 2905  *
 2906  * => wires map entries.  should not be used for transient page locking.
 2907  *      for that, use uvm_fault_wire()/uvm_fault_unwire() (see uvm_vslock()).
 2908  * => regions sepcified as not pageable require lock-down (wired) memory
 2909  *      and page tables.
 2910  * => map must never be read-locked
 2911  * => if islocked is TRUE, map is already write-locked
 2912  * => we always unlock the map, since we must downgrade to a read-lock
 2913  *      to call uvm_fault_wire()
 2914  * => XXXCDC: check this and try and clean it up.
 2915  */
 2916 
 2917 int
 2918 uvm_map_pageable(struct vm_map *map, vaddr_t start, vaddr_t end,
 2919     boolean_t new_pageable, int lockflags)
 2920 {
 2921         struct vm_map_entry *entry, *start_entry, *failed_entry;
 2922         int rv;
 2923 #ifdef DIAGNOSTIC
 2924         u_int timestamp_save;
 2925 #endif
 2926         UVMHIST_FUNC("uvm_map_pageable"); UVMHIST_CALLED(maphist);
 2927         UVMHIST_LOG(maphist,"(map=0x%x,start=0x%x,end=0x%x,new_pageable=0x%x)",
 2928                     map, start, end, new_pageable);
 2929         KASSERT(map->flags & VM_MAP_PAGEABLE);
 2930 
 2931         if ((lockflags & UVM_LK_ENTER) == 0)
 2932                 vm_map_lock(map);
 2933         VM_MAP_RANGE_CHECK(map, start, end);
 2934 
 2935         /*
 2936          * only one pageability change may take place at one time, since
 2937          * uvm_fault_wire assumes it will be called only once for each
 2938          * wiring/unwiring.  therefore, we have to make sure we're actually
 2939          * changing the pageability for the entire region.  we do so before
 2940          * making any changes.
 2941          */
 2942 
 2943         if (uvm_map_lookup_entry(map, start, &start_entry) == FALSE) {
 2944                 if ((lockflags & UVM_LK_EXIT) == 0)
 2945                         vm_map_unlock(map);
 2946 
 2947                 UVMHIST_LOG(maphist,"<- done (fault)",0,0,0,0);
 2948                 return EFAULT;
 2949         }
 2950         entry = start_entry;
 2951 
 2952         /*
 2953          * handle wiring and unwiring separately.
 2954          */
 2955 
 2956         if (new_pageable) {             /* unwire */
 2957                 UVM_MAP_CLIP_START(map, entry, start, NULL);
 2958 
 2959                 /*
 2960                  * unwiring.  first ensure that the range to be unwired is
 2961                  * really wired down and that there are no holes.
 2962                  */
 2963 
 2964                 while ((entry != &map->header) && (entry->start < end)) {
 2965                         if (entry->wired_count == 0 ||
 2966                             (entry->end < end &&
 2967                              (entry->next == &map->header ||
 2968                               entry->next->start > entry->end))) {
 2969                                 if ((lockflags & UVM_LK_EXIT) == 0)
 2970                                         vm_map_unlock(map);
 2971                                 UVMHIST_LOG(maphist, "<- done (INVAL)",0,0,0,0);
 2972                                 return EINVAL;
 2973                         }
 2974                         entry = entry->next;
 2975                 }
 2976 
 2977                 /*
 2978                  * POSIX 1003.1b - a single munlock call unlocks a region,
 2979                  * regardless of the number of mlock calls made on that
 2980                  * region.
 2981                  */
 2982 
 2983                 entry = start_entry;
 2984                 while ((entry != &map->header) && (entry->start < end)) {
 2985                         UVM_MAP_CLIP_END(map, entry, end, NULL);
 2986                         if (VM_MAPENT_ISWIRED(entry))
 2987                                 uvm_map_entry_unwire(map, entry);
 2988                         entry = entry->next;
 2989                 }
 2990                 if ((lockflags & UVM_LK_EXIT) == 0)
 2991                         vm_map_unlock(map);
 2992                 UVMHIST_LOG(maphist,"<- done (OK UNWIRE)",0,0,0,0);
 2993                 return 0;
 2994         }
 2995 
 2996         /*
 2997          * wire case: in two passes [XXXCDC: ugly block of code here]
 2998          *
 2999          * 1: holding the write lock, we create any anonymous maps that need
 3000          *    to be created.  then we clip each map entry to the region to
 3001          *    be wired and increment its wiring count.
 3002          *
 3003          * 2: we downgrade to a read lock, and call uvm_fault_wire to fault
 3004          *    in the pages for any newly wired area (wired_count == 1).
 3005          *
 3006          *    downgrading to a read lock for uvm_fault_wire avoids a possible
 3007          *    deadlock with another thread that may have faulted on one of
 3008          *    the pages to be wired (it would mark the page busy, blocking
 3009          *    us, then in turn block on the map lock that we hold).  because
 3010          *    of problems in the recursive lock package, we cannot upgrade
 3011          *    to a write lock in vm_map_lookup.  thus, any actions that
 3012          *    require the write lock must be done beforehand.  because we
 3013          *    keep the read lock on the map, the copy-on-write status of the
 3014          *    entries we modify here cannot change.
 3015          */
 3016 
 3017         while ((entry != &map->header) && (entry->start < end)) {
 3018                 if (VM_MAPENT_ISWIRED(entry) == 0) { /* not already wired? */
 3019 
 3020                         /*
 3021                          * perform actions of vm_map_lookup that need the
 3022                          * write lock on the map: create an anonymous map
 3023                          * for a copy-on-write region, or an anonymous map
 3024                          * for a zero-fill region.  (XXXCDC: submap case
 3025                          * ok?)
 3026                          */
 3027 
 3028                         if (!UVM_ET_ISSUBMAP(entry)) {  /* not submap */
 3029                                 if (UVM_ET_ISNEEDSCOPY(entry) &&
 3030                                     ((entry->max_protection & VM_PROT_WRITE) ||
 3031                                      (entry->object.uvm_obj == NULL))) {
 3032                                         amap_copy(map, entry, M_WAITOK, TRUE,
 3033                                             start, end);
 3034                                         /* XXXCDC: wait OK? */
 3035                                 }
 3036                         }
 3037                 }
 3038                 UVM_MAP_CLIP_START(map, entry, start, NULL);
 3039                 UVM_MAP_CLIP_END(map, entry, end, NULL);
 3040                 entry->wired_count++;
 3041 
 3042                 /*
 3043                  * Check for holes
 3044                  */
 3045 
 3046                 if (entry->protection == VM_PROT_NONE ||
 3047                     (entry->end < end &&
 3048                      (entry->next == &map->header ||
 3049                       entry->next->start > entry->end))) {
 3050 
 3051                         /*
 3052                          * found one.  amap creation actions do not need to
 3053                          * be undone, but the wired counts need to be restored.
 3054                          */
 3055 
 3056                         while (entry != &map->header && entry->end > start) {
 3057                                 entry->wired_count--;
 3058                                 entry = entry->prev;
 3059                         }
 3060                         if ((lockflags & UVM_LK_EXIT) == 0)
 3061                                 vm_map_unlock(map);
 3062                         UVMHIST_LOG(maphist,"<- done (INVALID WIRE)",0,0,0,0);
 3063                         return EINVAL;
 3064                 }
 3065                 entry = entry->next;
 3066         }
 3067 
 3068         /*
 3069          * Pass 2.
 3070          */
 3071 
 3072 #ifdef DIAGNOSTIC
 3073         timestamp_save = map->timestamp;
 3074 #endif
 3075         vm_map_busy(map);
 3076         vm_map_downgrade(map);
 3077 
 3078         rv = 0;
 3079         entry = start_entry;
 3080         while (entry != &map->header && entry->start < end) {
 3081                 if (entry->wired_count == 1) {
 3082                         rv = uvm_fault_wire(map, entry->start, entry->end,
 3083                             VM_FAULT_WIREMAX, entry->max_protection);
 3084                         if (rv) {
 3085 
 3086                                 /*
 3087                                  * wiring failed.  break out of the loop.
 3088                                  * we'll clean up the map below, once we
 3089                                  * have a write lock again.
 3090                                  */
 3091 
 3092                                 break;
 3093                         }
 3094                 }
 3095                 entry = entry->next;
 3096         }
 3097 
 3098         if (rv) {       /* failed? */
 3099 
 3100                 /*
 3101                  * Get back to an exclusive (write) lock.
 3102                  */
 3103 
 3104                 vm_map_upgrade(map);
 3105                 vm_map_unbusy(map);
 3106 
 3107 #ifdef DIAGNOSTIC
 3108                 if (timestamp_save != map->timestamp)
 3109                         panic("uvm_map_pageable: stale map");
 3110 #endif
 3111 
 3112                 /*
 3113                  * first drop the wiring count on all the entries
 3114                  * which haven't actually been wired yet.
 3115                  */
 3116 
 3117                 failed_entry = entry;
 3118                 while (entry != &map->header && entry->start < end) {
 3119                         entry->wired_count--;
 3120                         entry = entry->next;
 3121                 }
 3122 
 3123                 /*
 3124                  * now, unwire all the entries that were successfully
 3125                  * wired above.
 3126                  */
 3127 
 3128                 entry = start_entry;
 3129                 while (entry != failed_entry) {
 3130                         entry->wired_count--;
 3131                         if (VM_MAPENT_ISWIRED(entry) == 0)
 3132                                 uvm_map_entry_unwire(map, entry);
 3133                         entry = entry->next;
 3134                 }
 3135                 if ((lockflags & UVM_LK_EXIT) == 0)
 3136                         vm_map_unlock(map);
 3137                 UVMHIST_LOG(maphist, "<- done (RV=%d)", rv,0,0,0);
 3138                 return (rv);
 3139         }
 3140 
 3141         /* We are holding a read lock here. */
 3142         if ((lockflags & UVM_LK_EXIT) == 0) {
 3143                 vm_map_unbusy(map);
 3144                 vm_map_unlock_read(map);
 3145         } else {
 3146 
 3147                 /*
 3148                  * Get back to an exclusive (write) lock.
 3149                  */
 3150 
 3151                 vm_map_upgrade(map);
 3152                 vm_map_unbusy(map);
 3153         }
 3154 
 3155         UVMHIST_LOG(maphist,"<- done (OK WIRE)",0,0,0,0);
 3156         return 0;
 3157 }
 3158 
 3159 /*
 3160  * uvm_map_pageable_all: special case of uvm_map_pageable - affects
 3161  * all mapped regions.
 3162  *
 3163  * => map must not be locked.
 3164  * => if no flags are specified, all regions are unwired.
 3165  * => XXXJRT: has some of the same problems as uvm_map_pageable() above.
 3166  */
 3167 
 3168 int
 3169 uvm_map_pageable_all(struct vm_map *map, int flags, vsize_t limit)
 3170 {
 3171         struct vm_map_entry *entry, *failed_entry;
 3172         vsize_t size;
 3173         int rv;
 3174 #ifdef DIAGNOSTIC
 3175         u_int timestamp_save;
 3176 #endif
 3177         UVMHIST_FUNC("uvm_map_pageable_all"); UVMHIST_CALLED(maphist);
 3178         UVMHIST_LOG(maphist,"(map=0x%x,flags=0x%x)", map, flags, 0, 0);
 3179 
 3180         KASSERT(map->flags & VM_MAP_PAGEABLE);
 3181 
 3182         vm_map_lock(map);
 3183 
 3184         /*
 3185          * handle wiring and unwiring separately.
 3186          */
 3187 
 3188         if (flags == 0) {                       /* unwire */
 3189 
 3190                 /*
 3191                  * POSIX 1003.1b -- munlockall unlocks all regions,
 3192                  * regardless of how many times mlockall has been called.
 3193                  */
 3194 
 3195                 for (entry = map->header.next; entry != &map->header;
 3196                      entry = entry->next) {
 3197                         if (VM_MAPENT_ISWIRED(entry))
 3198                                 uvm_map_entry_unwire(map, entry);
 3199                 }
 3200                 vm_map_modflags(map, 0, VM_MAP_WIREFUTURE);
 3201                 vm_map_unlock(map);
 3202                 UVMHIST_LOG(maphist,"<- done (OK UNWIRE)",0,0,0,0);
 3203                 return 0;
 3204         }
 3205 
 3206         if (flags & MCL_FUTURE) {
 3207 
 3208                 /*
 3209                  * must wire all future mappings; remember this.
 3210                  */
 3211 
 3212                 vm_map_modflags(map, VM_MAP_WIREFUTURE, 0);
 3213         }
 3214 
 3215         if ((flags & MCL_CURRENT) == 0) {
 3216 
 3217                 /*
 3218                  * no more work to do!
 3219                  */
 3220 
 3221                 UVMHIST_LOG(maphist,"<- done (OK no wire)",0,0,0,0);
 3222                 vm_map_unlock(map);
 3223                 return 0;
 3224         }
 3225 
 3226         /*
 3227          * wire case: in three passes [XXXCDC: ugly block of code here]
 3228          *
 3229          * 1: holding the write lock, count all pages mapped by non-wired
 3230          *    entries.  if this would cause us to go over our limit, we fail.
 3231          *
 3232          * 2: still holding the write lock, we create any anonymous maps that
 3233          *    need to be created.  then we increment its wiring count.
 3234          *
 3235          * 3: we downgrade to a read lock, and call uvm_fault_wire to fault
 3236          *    in the pages for any newly wired area (wired_count == 1).
 3237          *
 3238          *    downgrading to a read lock for uvm_fault_wire avoids a possible
 3239          *    deadlock with another thread that may have faulted on one of
 3240          *    the pages to be wired (it would mark the page busy, blocking
 3241          *    us, then in turn block on the map lock that we hold).  because
 3242          *    of problems in the recursive lock package, we cannot upgrade
 3243          *    to a write lock in vm_map_lookup.  thus, any actions that
 3244          *    require the write lock must be done beforehand.  because we
 3245          *    keep the read lock on the map, the copy-on-write status of the
 3246          *    entries we modify here cannot change.
 3247          */
 3248 
 3249         for (size = 0, entry = map->header.next; entry != &map->header;
 3250              entry = entry->next) {
 3251                 if (entry->protection != VM_PROT_NONE &&
 3252                     VM_MAPENT_ISWIRED(entry) == 0) { /* not already wired? */
 3253                         size += entry->end - entry->start;
 3254                 }
 3255         }
 3256 
 3257         if (atop(size) + uvmexp.wired > uvmexp.wiredmax) {
 3258                 vm_map_unlock(map);
 3259                 return ENOMEM;
 3260         }
 3261 
 3262         if (limit != 0 &&
 3263             (size + ptoa(pmap_wired_count(vm_map_pmap(map))) > limit)) {
 3264                 vm_map_unlock(map);
 3265                 return ENOMEM;
 3266         }
 3267 
 3268         /*
 3269          * Pass 2.
 3270          */
 3271 
 3272         for (entry = map->header.next; entry != &map->header;
 3273              entry = entry->next) {
 3274                 if (entry->protection == VM_PROT_NONE)
 3275                         continue;
 3276                 if (VM_MAPENT_ISWIRED(entry) == 0) { /* not already wired? */
 3277 
 3278                         /*
 3279                          * perform actions of vm_map_lookup that need the
 3280                          * write lock on the map: create an anonymous map
 3281                          * for a copy-on-write region, or an anonymous map
 3282                          * for a zero-fill region.  (XXXCDC: submap case
 3283                          * ok?)
 3284                          */
 3285 
 3286                         if (!UVM_ET_ISSUBMAP(entry)) {  /* not submap */
 3287                                 if (UVM_ET_ISNEEDSCOPY(entry) &&
 3288                                     ((entry->max_protection & VM_PROT_WRITE) ||
 3289                                      (entry->object.uvm_obj == NULL))) {
 3290                                         amap_copy(map, entry, M_WAITOK, TRUE,
 3291                                             entry->start, entry->end);
 3292                                         /* XXXCDC: wait OK? */
 3293                                 }
 3294                         }
 3295                 }
 3296                 entry->wired_count++;
 3297         }
 3298 
 3299         /*
 3300          * Pass 3.
 3301          */
 3302 
 3303 #ifdef DIAGNOSTIC
 3304         timestamp_save = map->timestamp;
 3305 #endif
 3306         vm_map_busy(map);
 3307         vm_map_downgrade(map);
 3308 
 3309         rv = 0;
 3310         for (entry = map->header.next; entry != &map->header;
 3311              entry = entry->next) {
 3312                 if (entry->wired_count == 1) {
 3313                         rv = uvm_fault_wire(map, entry->start, entry->end,
 3314                             VM_FAULT_WIREMAX, entry->max_protection);
 3315                         if (rv) {
 3316 
 3317                                 /*
 3318                                  * wiring failed.  break out of the loop.
 3319                                  * we'll clean up the map below, once we
 3320                                  * have a write lock again.
 3321                                  */
 3322 
 3323                                 break;
 3324                         }
 3325                 }
 3326         }
 3327 
 3328         if (rv) {
 3329 
 3330                 /*
 3331                  * Get back an exclusive (write) lock.
 3332                  */
 3333 
 3334                 vm_map_upgrade(map);
 3335                 vm_map_unbusy(map);
 3336 
 3337 #ifdef DIAGNOSTIC
 3338                 if (timestamp_save != map->timestamp)
 3339                         panic("uvm_map_pageable_all: stale map");
 3340 #endif
 3341 
 3342                 /*
 3343                  * first drop the wiring count on all the entries
 3344                  * which haven't actually been wired yet.
 3345                  *
 3346                  * Skip VM_PROT_NONE entries like we did above.
 3347                  */
 3348 
 3349                 failed_entry = entry;
 3350                 for (/* nothing */; entry != &map->header;
 3351                      entry = entry->next) {
 3352                         if (entry->protection == VM_PROT_NONE)
 3353                                 continue;
 3354                         entry->wired_count--;
 3355                 }
 3356 
 3357                 /*
 3358                  * now, unwire all the entries that were successfully
 3359                  * wired above.
 3360                  *
 3361                  * Skip VM_PROT_NONE entries like we did above.
 3362                  */
 3363 
 3364                 for (entry = map->header.next; entry != failed_entry;
 3365                      entry = entry->next) {
 3366                         if (entry->protection == VM_PROT_NONE)
 3367                                 continue;
 3368                         entry->wired_count--;
 3369                         if (VM_MAPENT_ISWIRED(entry))
 3370                                 uvm_map_entry_unwire(map, entry);
 3371                 }
 3372                 vm_map_unlock(map);
 3373                 UVMHIST_LOG(maphist,"<- done (RV=%d)", rv,0,0,0);
 3374                 return (rv);
 3375         }
 3376 
 3377         /* We are holding a read lock here. */
 3378         vm_map_unbusy(map);
 3379         vm_map_unlock_read(map);
 3380 
 3381         UVMHIST_LOG(maphist,"<- done (OK WIRE)",0,0,0,0);
 3382         return 0;
 3383 }
 3384 
 3385 /*
 3386  * uvm_map_clean: clean out a map range
 3387  *
 3388  * => valid flags:
 3389  *   if (flags & PGO_CLEANIT): dirty pages are cleaned first
 3390  *   if (flags & PGO_SYNCIO): dirty pages are written synchronously
 3391  *   if (flags & PGO_DEACTIVATE): any cached pages are deactivated after clean
 3392  *   if (flags & PGO_FREE): any cached pages are freed after clean
 3393  * => returns an error if any part of the specified range isn't mapped
 3394  * => never a need to flush amap layer since the anonymous memory has
 3395  *      no permanent home, but may deactivate pages there
 3396  * => called from sys_msync() and sys_madvise()
 3397  * => caller must not write-lock map (read OK).
 3398  * => we may sleep while cleaning if SYNCIO [with map read-locked]
 3399  */
 3400 
 3401 int
 3402 uvm_map_clean(struct vm_map *map, vaddr_t start, vaddr_t end, int flags)
 3403 {
 3404         struct vm_map_entry *current, *entry;
 3405         struct uvm_object *uobj;
 3406         struct vm_amap *amap;
 3407         struct vm_anon *anon;
 3408         struct vm_page *pg;
 3409         vaddr_t offset;
 3410         vsize_t size;
 3411         voff_t uoff;
 3412         int error, refs;
 3413         UVMHIST_FUNC("uvm_map_clean"); UVMHIST_CALLED(maphist);
 3414 
 3415         UVMHIST_LOG(maphist,"(map=0x%x,start=0x%x,end=0x%x,flags=0x%x)",
 3416                     map, start, end, flags);
 3417         KASSERT((flags & (PGO_FREE|PGO_DEACTIVATE)) !=
 3418                 (PGO_FREE|PGO_DEACTIVATE));
 3419 
 3420         vm_map_lock_read(map);
 3421         VM_MAP_RANGE_CHECK(map, start, end);
 3422         if (uvm_map_lookup_entry(map, start, &entry) == FALSE) {
 3423                 vm_map_unlock_read(map);
 3424                 return EFAULT;
 3425         }
 3426 
 3427         /*
 3428          * Make a first pass to check for holes and wiring problems.
 3429          */
 3430 
 3431         for (current = entry; current->start < end; current = current->next) {
 3432                 if (UVM_ET_ISSUBMAP(current)) {
 3433                         vm_map_unlock_read(map);
 3434                         return EINVAL;
 3435                 }
 3436                 if ((flags & PGO_FREE) != 0 && VM_MAPENT_ISWIRED(entry)) {
 3437                         vm_map_unlock_read(map);
 3438                         return EBUSY;
 3439                 }
 3440                 if (end <= current->end) {
 3441                         break;
 3442                 }
 3443                 if (current->end != current->next->start) {
 3444                         vm_map_unlock_read(map);
 3445                         return EFAULT;
 3446                 }
 3447         }
 3448 
 3449         error = 0;
 3450         for (current = entry; start < end; current = current->next) {
 3451                 amap = current->aref.ar_amap;   /* top layer */
 3452                 uobj = current->object.uvm_obj; /* bottom layer */
 3453                 KASSERT(start >= current->start);
 3454 
 3455                 /*
 3456                  * No amap cleaning necessary if:
 3457                  *
 3458                  *      (1) There's no amap.
 3459                  *
 3460                  *      (2) We're not deactivating or freeing pages.
 3461                  */
 3462 
 3463                 if (amap == NULL || (flags & (PGO_DEACTIVATE|PGO_FREE)) == 0)
 3464                         goto flush_object;
 3465 
 3466                 amap_lock(amap);
 3467                 offset = start - current->start;
 3468                 size = MIN(end, current->end) - start;
 3469                 for ( ; size != 0; size -= PAGE_SIZE, offset += PAGE_SIZE) {
 3470                         anon = amap_lookup(&current->aref, offset);
 3471                         if (anon == NULL)
 3472                                 continue;
 3473 
 3474                         simple_lock(&anon->an_lock);
 3475                         pg = anon->u.an_page;
 3476                         if (pg == NULL) {
 3477                                 simple_unlock(&anon->an_lock);
 3478                                 continue;
 3479                         }
 3480 
 3481                         switch (flags & (PGO_CLEANIT|PGO_FREE|PGO_DEACTIVATE)) {
 3482 
 3483                         /*
 3484                          * In these first 3 cases, we just deactivate the page.
 3485                          */
 3486 
 3487                         case PGO_CLEANIT|PGO_FREE:
 3488                         case PGO_CLEANIT|PGO_DEACTIVATE:
 3489                         case PGO_DEACTIVATE:
 3490  deactivate_it:
 3491                                 /*
 3492                                  * skip the page if it's loaned or wired,
 3493                                  * since it shouldn't be on a paging queue
 3494                                  * at all in these cases.
 3495                                  */
 3496 
 3497                                 uvm_lock_pageq();
 3498                                 if (pg->loan_count != 0 ||
 3499                                     pg->wire_count != 0) {
 3500                                         uvm_unlock_pageq();
 3501                                         simple_unlock(&anon->an_lock);
 3502                                         continue;
 3503                                 }
 3504                                 KASSERT(pg->uanon == anon);
 3505                                 pmap_clear_reference(pg);
 3506                                 uvm_pagedeactivate(pg);
 3507                                 uvm_unlock_pageq();
 3508                                 simple_unlock(&anon->an_lock);
 3509                                 continue;
 3510 
 3511                         case PGO_FREE:
 3512 
 3513                                 /*
 3514                                  * If there are multiple references to
 3515                                  * the amap, just deactivate the page.
 3516                                  */
 3517 
 3518                                 if (amap_refs(amap) > 1)
 3519                                         goto deactivate_it;
 3520 
 3521                                 /* skip the page if it's wired */
 3522                                 if (pg->wire_count != 0) {
 3523                                         simple_unlock(&anon->an_lock);
 3524                                         continue;
 3525                                 }
 3526                                 amap_unadd(&current->aref, offset);
 3527                                 refs = --anon->an_ref;
 3528                                 simple_unlock(&anon->an_lock);
 3529                                 if (refs == 0)
 3530                                         uvm_anfree(anon);
 3531                                 continue;
 3532                         }
 3533                 }
 3534                 amap_unlock(amap);
 3535 
 3536  flush_object:
 3537                 /*
 3538                  * flush pages if we've got a valid backing object.
 3539                  * note that we must always clean object pages before
 3540                  * freeing them since otherwise we could reveal stale
 3541                  * data from files.
 3542                  */
 3543 
 3544                 uoff = current->offset + (start - current->start);
 3545                 size = MIN(end, current->end) - start;
 3546                 if (uobj != NULL) {
 3547                         simple_lock(&uobj->vmobjlock);
 3548                         if (uobj->pgops->pgo_put != NULL)
 3549                                 error = (uobj->pgops->pgo_put)(uobj, uoff,
 3550                                     uoff + size, flags | PGO_CLEANIT);
 3551                         else
 3552                                 error = 0;
 3553                 }
 3554                 start += size;
 3555         }
 3556         vm_map_unlock_read(map);
 3557         return (error);
 3558 }
 3559 
 3560 
 3561 /*
 3562  * uvm_map_checkprot: check protection in map
 3563  *
 3564  * => must allow specified protection in a fully allocated region.
 3565  * => map must be read or write locked by caller.
 3566  */
 3567 
 3568 boolean_t
 3569 uvm_map_checkprot(struct vm_map *map, vaddr_t start, vaddr_t end,
 3570     vm_prot_t protection)
 3571 {
 3572         struct vm_map_entry *entry;
 3573         struct vm_map_entry *tmp_entry;
 3574 
 3575         if (!uvm_map_lookup_entry(map, start, &tmp_entry)) {
 3576                 return (FALSE);
 3577         }
 3578         entry = tmp_entry;
 3579         while (start < end) {
 3580                 if (entry == &map->header) {
 3581                         return (FALSE);
 3582                 }
 3583 
 3584                 /*
 3585                  * no holes allowed
 3586                  */
 3587 
 3588                 if (start < entry->start) {
 3589                         return (FALSE);
 3590                 }
 3591 
 3592                 /*
 3593                  * check protection associated with entry
 3594                  */
 3595 
 3596                 if ((entry->protection & protection) != protection) {
 3597                         return (FALSE);
 3598                 }
 3599                 start = entry->end;
 3600                 entry = entry->next;
 3601         }
 3602         return (TRUE);
 3603 }
 3604 
 3605 /*
 3606  * uvmspace_alloc: allocate a vmspace structure.
 3607  *
 3608  * - structure includes vm_map and pmap
 3609  * - XXX: no locking on this structure
 3610  * - refcnt set to 1, rest must be init'd by caller
 3611  */
 3612 struct vmspace *
 3613 uvmspace_alloc(vaddr_t min, vaddr_t max)
 3614 {
 3615         struct vmspace *vm;
 3616         UVMHIST_FUNC("uvmspace_alloc"); UVMHIST_CALLED(maphist);
 3617 
 3618         vm = pool_get(&uvm_vmspace_pool, PR_WAITOK);
 3619         uvmspace_init(vm, NULL, min, max);
 3620         UVMHIST_LOG(maphist,"<- done (vm=0x%x)", vm,0,0,0);
 3621         return (vm);
 3622 }
 3623 
 3624 /*
 3625  * uvmspace_init: initialize a vmspace structure.
 3626  *
 3627  * - XXX: no locking on this structure
 3628  * - refcnt set to 1, rest must be init'd by caller
 3629  */
 3630 void
 3631 uvmspace_init(struct vmspace *vm, struct pmap *pmap, vaddr_t min, vaddr_t max)
 3632 {
 3633         UVMHIST_FUNC("uvmspace_init"); UVMHIST_CALLED(maphist);
 3634 
 3635         memset(vm, 0, sizeof(*vm));
 3636         uvm_map_setup(&vm->vm_map, min, max, VM_MAP_PAGEABLE
 3637 #ifdef __USING_TOPDOWN_VM
 3638             | VM_MAP_TOPDOWN
 3639 #endif
 3640             );
 3641         if (pmap)
 3642                 pmap_reference(pmap);
 3643         else
 3644                 pmap = pmap_create();
 3645         vm->vm_map.pmap = pmap;
 3646         vm->vm_refcnt = 1;
 3647         UVMHIST_LOG(maphist,"<- done",0,0,0,0);
 3648 }
 3649 
 3650 /*
 3651  * uvmspace_share: share a vmspace between two processes
 3652  *
 3653  * - used for vfork, threads(?)
 3654  */
 3655 
 3656 void
 3657 uvmspace_share(struct proc *p1, struct proc *p2)
 3658 {
 3659         struct simplelock *slock = &p1->p_vmspace->vm_map.ref_lock;
 3660 
 3661         p2->p_vmspace = p1->p_vmspace;
 3662         simple_lock(slock);
 3663         p1->p_vmspace->vm_refcnt++;
 3664         simple_unlock(slock);
 3665 }
 3666 
 3667 /*
 3668  * uvmspace_unshare: ensure that process "p" has its own, unshared, vmspace
 3669  *
 3670  * - XXX: no locking on vmspace
 3671  */
 3672 
 3673 void
 3674 uvmspace_unshare(struct lwp *l)
 3675 {
 3676         struct proc *p = l->l_proc;
 3677         struct vmspace *nvm, *ovm = p->p_vmspace;
 3678 
 3679         if (ovm->vm_refcnt == 1)
 3680                 /* nothing to do: vmspace isn't shared in the first place */
 3681                 return;
 3682 
 3683         /* make a new vmspace, still holding old one */
 3684         nvm = uvmspace_fork(ovm);
 3685 
 3686         pmap_deactivate(l);             /* unbind old vmspace */
 3687         p->p_vmspace = nvm;
 3688         pmap_activate(l);               /* switch to new vmspace */
 3689 
 3690         uvmspace_free(ovm);             /* drop reference to old vmspace */
 3691 }
 3692 
 3693 /*
 3694  * uvmspace_exec: the process wants to exec a new program
 3695  */
 3696 
 3697 void
 3698 uvmspace_exec(struct lwp *l, vaddr_t start, vaddr_t end)
 3699 {
 3700         struct proc *p = l->l_proc;
 3701         struct vmspace *nvm, *ovm = p->p_vmspace;
 3702         struct vm_map *map = &ovm->vm_map;
 3703 
 3704 #ifdef __sparc__
 3705         /* XXX cgd 960926: the sparc #ifdef should be a MD hook */
 3706         kill_user_windows(l);   /* before stack addresses go away */
 3707 #endif
 3708 
 3709         /*
 3710          * see if more than one process is using this vmspace...
 3711          */
 3712 
 3713         if (ovm->vm_refcnt == 1) {
 3714 
 3715                 /*
 3716                  * if p is the only process using its vmspace then we can safely
 3717                  * recycle that vmspace for the program that is being exec'd.
 3718                  */
 3719 
 3720 #ifdef SYSVSHM
 3721                 /*
 3722                  * SYSV SHM semantics require us to kill all segments on an exec
 3723                  */
 3724 
 3725                 if (ovm->vm_shm)
 3726                         shmexit(ovm);
 3727 #endif
 3728 
 3729                 /*
 3730                  * POSIX 1003.1b -- "lock future mappings" is revoked
 3731                  * when a process execs another program image.
 3732                  */
 3733 
 3734                 vm_map_modflags(map, 0, VM_MAP_WIREFUTURE);
 3735 
 3736                 /*
 3737                  * now unmap the old program
 3738                  */
 3739 
 3740                 pmap_remove_all(map->pmap);
 3741                 uvm_unmap(map, vm_map_min(map), vm_map_max(map));
 3742                 KASSERT(map->header.prev == &map->header);
 3743                 KASSERT(map->nentries == 0);
 3744 
 3745                 /*
 3746                  * resize the map
 3747                  */
 3748 
 3749                 vm_map_setmin(map, start);
 3750                 vm_map_setmax(map, end);
 3751         } else {
 3752 
 3753                 /*
 3754                  * p's vmspace is being shared, so we can't reuse it for p since
 3755                  * it is still being used for others.   allocate a new vmspace
 3756                  * for p
 3757                  */
 3758 
 3759                 nvm = uvmspace_alloc(start, end);
 3760 
 3761                 /*
 3762                  * install new vmspace and drop our ref to the old one.
 3763                  */
 3764 
 3765                 pmap_deactivate(l);
 3766                 p->p_vmspace = nvm;
 3767                 pmap_activate(l);
 3768 
 3769                 uvmspace_free(ovm);
 3770         }
 3771 }
 3772 
 3773 /*
 3774  * uvmspace_free: free a vmspace data structure
 3775  */
 3776 
 3777 void
 3778 uvmspace_free(struct vmspace *vm)
 3779 {
 3780         struct vm_map_entry *dead_entries;
 3781         struct vm_map *map = &vm->vm_map;
 3782         int n;
 3783 
 3784         UVMHIST_FUNC("uvmspace_free"); UVMHIST_CALLED(maphist);
 3785 
 3786         UVMHIST_LOG(maphist,"(vm=0x%x) ref=%d", vm, vm->vm_refcnt,0,0);
 3787         simple_lock(&map->ref_lock);
 3788         n = --vm->vm_refcnt;
 3789         simple_unlock(&map->ref_lock);
 3790         if (n > 0)
 3791                 return;
 3792 
 3793         /*
 3794          * at this point, there should be no other references to the map.
 3795          * delete all of the mappings, then destroy the pmap.
 3796          */
 3797 
 3798         map->flags |= VM_MAP_DYING;
 3799         pmap_remove_all(map->pmap);
 3800 #ifdef SYSVSHM
 3801         /* Get rid of any SYSV shared memory segments. */
 3802         if (vm->vm_shm != NULL)
 3803                 shmexit(vm);
 3804 #endif
 3805         if (map->nentries) {
 3806                 uvm_unmap_remove(map, vm_map_min(map), vm_map_max(map),
 3807                     &dead_entries, NULL);
 3808                 if (dead_entries != NULL)
 3809                         uvm_unmap_detach(dead_entries, 0);
 3810         }
 3811         KASSERT(map->nentries == 0);
 3812         KASSERT(map->size == 0);
 3813         pmap_destroy(map->pmap);
 3814         pool_put(&uvm_vmspace_pool, vm);
 3815 }
 3816 
 3817 /*
 3818  *   F O R K   -   m a i n   e n t r y   p o i n t
 3819  */
 3820 /*
 3821  * uvmspace_fork: fork a process' main map
 3822  *
 3823  * => create a new vmspace for child process from parent.
 3824  * => parent's map must not be locked.
 3825  */
 3826 
 3827 struct vmspace *
 3828 uvmspace_fork(struct vmspace *vm1)
 3829 {
 3830         struct vmspace *vm2;
 3831         struct vm_map *old_map = &vm1->vm_map;
 3832         struct vm_map *new_map;
 3833         struct vm_map_entry *old_entry;
 3834         struct vm_map_entry *new_entry;
 3835         UVMHIST_FUNC("uvmspace_fork"); UVMHIST_CALLED(maphist);
 3836 
 3837         vm_map_lock(old_map);
 3838 
 3839         vm2 = uvmspace_alloc(vm_map_min(old_map), vm_map_max(old_map));
 3840         memcpy(&vm2->vm_startcopy, &vm1->vm_startcopy,
 3841             (caddr_t) (vm1 + 1) - (caddr_t) &vm1->vm_startcopy);
 3842         new_map = &vm2->vm_map;           /* XXX */
 3843 
 3844         old_entry = old_map->header.next;
 3845         new_map->size = old_map->size;
 3846 
 3847         /*
 3848          * go entry-by-entry
 3849          */
 3850 
 3851         while (old_entry != &old_map->header) {
 3852 
 3853                 /*
 3854                  * first, some sanity checks on the old entry
 3855                  */
 3856 
 3857                 KASSERT(!UVM_ET_ISSUBMAP(old_entry));
 3858                 KASSERT(UVM_ET_ISCOPYONWRITE(old_entry) ||
 3859                         !UVM_ET_ISNEEDSCOPY(old_entry));
 3860 
 3861                 switch (old_entry->inheritance) {
 3862                 case MAP_INHERIT_NONE:
 3863 
 3864                         /*
 3865                          * drop the mapping, modify size
 3866                          */
 3867                         new_map->size -= old_entry->end - old_entry->start;
 3868                         break;
 3869 
 3870                 case MAP_INHERIT_SHARE:
 3871 
 3872                         /*
 3873                          * share the mapping: this means we want the old and
 3874                          * new entries to share amaps and backing objects.
 3875                          */
 3876                         /*
 3877                          * if the old_entry needs a new amap (due to prev fork)
 3878                          * then we need to allocate it now so that we have
 3879                          * something we own to share with the new_entry.   [in
 3880                          * other words, we need to clear needs_copy]
 3881                          */
 3882 
 3883                         if (UVM_ET_ISNEEDSCOPY(old_entry)) {
 3884                                 /* get our own amap, clears needs_copy */
 3885                                 amap_copy(old_map, old_entry, M_WAITOK, FALSE,
 3886                                     0, 0);
 3887                                 /* XXXCDC: WAITOK??? */
 3888                         }
 3889 
 3890                         new_entry = uvm_mapent_alloc(new_map, 0);
 3891                         /* old_entry -> new_entry */
 3892                         uvm_mapent_copy(old_entry, new_entry);
 3893 
 3894                         /* new pmap has nothing wired in it */
 3895                         new_entry->wired_count = 0;
 3896 
 3897                         /*
 3898                          * gain reference to object backing the map (can't
 3899                          * be a submap, already checked this case).
 3900                          */
 3901 
 3902                         if (new_entry->aref.ar_amap)
 3903                                 uvm_map_reference_amap(new_entry, AMAP_SHARED);
 3904 
 3905                         if (new_entry->object.uvm_obj &&
 3906                             new_entry->object.uvm_obj->pgops->pgo_reference)
 3907                                 new_entry->object.uvm_obj->
 3908                                     pgops->pgo_reference(
 3909                                         new_entry->object.uvm_obj);
 3910 
 3911                         /* insert entry at end of new_map's entry list */
 3912                         uvm_map_entry_link(new_map, new_map->header.prev,
 3913                             new_entry);
 3914 
 3915                         break;
 3916 
 3917                 case MAP_INHERIT_COPY:
 3918 
 3919                         /*
 3920                          * copy-on-write the mapping (using mmap's
 3921                          * MAP_PRIVATE semantics)
 3922                          *
 3923                          * allocate new_entry, adjust reference counts.
 3924                          * (note that new references are read-only).
 3925                          */
 3926 
 3927                         new_entry = uvm_mapent_alloc(new_map, 0);
 3928                         /* old_entry -> new_entry */
 3929                         uvm_mapent_copy(old_entry, new_entry);
 3930 
 3931                         if (new_entry->aref.ar_amap)
 3932                                 uvm_map_reference_amap(new_entry, 0);
 3933 
 3934                         if (new_entry->object.uvm_obj &&
 3935                             new_entry->object.uvm_obj->pgops->pgo_reference)
 3936                                 new_entry->object.uvm_obj->pgops->pgo_reference
 3937                                     (new_entry->object.uvm_obj);
 3938 
 3939                         /* new pmap has nothing wired in it */
 3940                         new_entry->wired_count = 0;
 3941 
 3942                         new_entry->etype |=
 3943                             (UVM_ET_COPYONWRITE|UVM_ET_NEEDSCOPY);
 3944                         uvm_map_entry_link(new_map, new_map->header.prev,
 3945                             new_entry);
 3946 
 3947                         /*
 3948                          * the new entry will need an amap.  it will either
 3949                          * need to be copied from the old entry or created
 3950                          * from scratch (if the old entry does not have an
 3951                          * amap).  can we defer this process until later
 3952                          * (by setting "needs_copy") or do we need to copy
 3953                          * the amap now?
 3954                          *
 3955                          * we must copy the amap now if any of the following
 3956                          * conditions hold:
 3957                          * 1. the old entry has an amap and that amap is
 3958                          *    being shared.  this means that the old (parent)
 3959                          *    process is sharing the amap with another
 3960                          *    process.  if we do not clear needs_copy here
 3961                          *    we will end up in a situation where both the
 3962                          *    parent and child process are refering to the
 3963                          *    same amap with "needs_copy" set.  if the
 3964                          *    parent write-faults, the fault routine will
 3965                          *    clear "needs_copy" in the parent by allocating
 3966                          *    a new amap.   this is wrong because the
 3967                          *    parent is supposed to be sharing the old amap
 3968                          *    and the new amap will break that.
 3969                          *
 3970                          * 2. if the old entry has an amap and a non-zero
 3971                          *    wire count then we are going to have to call
 3972                          *    amap_cow_now to avoid page faults in the
 3973                          *    parent process.   since amap_cow_now requires
 3974                          *    "needs_copy" to be clear we might as well
 3975                          *    clear it here as well.
 3976                          *
 3977                          */
 3978 
 3979                         if (old_entry->aref.ar_amap != NULL) {
 3980                                 if ((amap_flags(old_entry->aref.ar_amap) &
 3981                                      AMAP_SHARED) != 0 ||
 3982                                     VM_MAPENT_ISWIRED(old_entry)) {
 3983 
 3984                                         amap_copy(new_map, new_entry, M_WAITOK,
 3985                                             FALSE, 0, 0);
 3986                                         /* XXXCDC: M_WAITOK ... ok? */
 3987                                 }
 3988                         }
 3989 
 3990                         /*
 3991                          * if the parent's entry is wired down, then the
 3992                          * parent process does not want page faults on
 3993                          * access to that memory.  this means that we
 3994                          * cannot do copy-on-write because we can't write
 3995                          * protect the old entry.   in this case we
 3996                          * resolve all copy-on-write faults now, using
 3997                          * amap_cow_now.   note that we have already
 3998                          * allocated any needed amap (above).
 3999                          */
 4000 
 4001                         if (VM_MAPENT_ISWIRED(old_entry)) {
 4002 
 4003                           /*
 4004                            * resolve all copy-on-write faults now
 4005                            * (note that there is nothing to do if
 4006                            * the old mapping does not have an amap).
 4007                            */
 4008                           if (old_entry->aref.ar_amap)
 4009                             amap_cow_now(new_map, new_entry);
 4010 
 4011                         } else {
 4012 
 4013                           /*
 4014                            * setup mappings to trigger copy-on-write faults
 4015                            * we must write-protect the parent if it has
 4016                            * an amap and it is not already "needs_copy"...
 4017                            * if it is already "needs_copy" then the parent
 4018                            * has already been write-protected by a previous
 4019                            * fork operation.
 4020                            */
 4021 
 4022                           if (old_entry->aref.ar_amap &&
 4023                               !UVM_ET_ISNEEDSCOPY(old_entry)) {
 4024                               if (old_entry->max_protection & VM_PROT_WRITE) {
 4025                                 pmap_protect(old_map->pmap,
 4026                                              old_entry->start,
 4027                                              old_entry->end,
 4028                                              old_entry->protection &
 4029                                              ~VM_PROT_WRITE);
 4030                                 pmap_update(old_map->pmap);
 4031                               }
 4032                               old_entry->etype |= UVM_ET_NEEDSCOPY;
 4033                           }
 4034                         }
 4035                         break;
 4036                 }  /* end of switch statement */
 4037                 old_entry = old_entry->next;
 4038         }
 4039 
 4040         vm_map_unlock(old_map);
 4041 
 4042 #ifdef SYSVSHM
 4043         if (vm1->vm_shm)
 4044                 shmfork(vm1, vm2);
 4045 #endif
 4046 
 4047 #ifdef PMAP_FORK
 4048         pmap_fork(vm1->vm_map.pmap, vm2->vm_map.pmap);
 4049 #endif
 4050 
 4051         UVMHIST_LOG(maphist,"<- done",0,0,0,0);
 4052         return (vm2);
 4053 }
 4054 
 4055 
 4056 /*
 4057  * in-kernel map entry allocation.
 4058  */
 4059 
 4060 int ukh_alloc, ukh_free;
 4061 int uke_alloc, uke_free;
 4062 
 4063 struct uvm_kmapent_hdr {
 4064         LIST_ENTRY(uvm_kmapent_hdr) ukh_listq;
 4065         int ukh_nused;
 4066         struct vm_map_entry *ukh_freelist;
 4067         struct vm_map *ukh_map;
 4068         struct vm_map_entry ukh_entries[0];
 4069 };
 4070 
 4071 #define UVM_KMAPENT_CHUNK                               \
 4072         ((PAGE_SIZE - sizeof(struct uvm_kmapent_hdr))   \
 4073         / sizeof(struct vm_map_entry))
 4074 
 4075 #define UVM_KHDR_FIND(entry)    \
 4076         ((struct uvm_kmapent_hdr *)(((vaddr_t)entry) & ~PAGE_MASK))
 4077 
 4078 static __inline struct vm_map_entry *uvm_kmapent_get(struct uvm_kmapent_hdr *);
 4079 static __inline void uvm_kmapent_put(struct uvm_kmapent_hdr *,
 4080     struct vm_map_entry *);
 4081 
 4082 static __inline struct vm_map *
 4083 uvm_kmapent_map(struct vm_map_entry *entry)
 4084 {
 4085         const struct uvm_kmapent_hdr *ukh;
 4086 
 4087         ukh = UVM_KHDR_FIND(entry);
 4088         return ukh->ukh_map;
 4089 }
 4090 
 4091 static __inline struct vm_map_entry *
 4092 uvm_kmapent_get(struct uvm_kmapent_hdr *ukh)
 4093 {
 4094         struct vm_map_entry *entry;
 4095 
 4096         KASSERT(ukh->ukh_nused <= UVM_KMAPENT_CHUNK);
 4097         KASSERT(ukh->ukh_nused >= 0);
 4098 
 4099         entry = ukh->ukh_freelist;
 4100         if (entry) {
 4101                 KASSERT((entry->flags & (UVM_MAP_KERNEL | UVM_MAP_KMAPENT))
 4102                     == UVM_MAP_KERNEL);
 4103                 ukh->ukh_freelist = entry->next;
 4104                 ukh->ukh_nused++;
 4105                 KASSERT(ukh->ukh_nused <= UVM_KMAPENT_CHUNK);
 4106         } else {
 4107                 KASSERT(ukh->ukh_nused == UVM_KMAPENT_CHUNK);
 4108         }
 4109 
 4110         return entry;
 4111 }
 4112 
 4113 static __inline void
 4114 uvm_kmapent_put(struct uvm_kmapent_hdr *ukh, struct vm_map_entry *entry)
 4115 {
 4116 
 4117         KASSERT((entry->flags & (UVM_MAP_KERNEL | UVM_MAP_KMAPENT))
 4118             == UVM_MAP_KERNEL);
 4119         KASSERT(ukh->ukh_nused <= UVM_KMAPENT_CHUNK);
 4120         KASSERT(ukh->ukh_nused > 0);
 4121         KASSERT(ukh->ukh_freelist != NULL ||
 4122             ukh->ukh_nused == UVM_KMAPENT_CHUNK);
 4123         KASSERT(ukh->ukh_freelist == NULL ||
 4124             ukh->ukh_nused < UVM_KMAPENT_CHUNK);
 4125 
 4126         ukh->ukh_nused--;
 4127         entry->next = ukh->ukh_freelist;
 4128         ukh->ukh_freelist = entry;
 4129 }
 4130 
 4131 /*
 4132  * uvm_kmapent_alloc: allocate a map entry for in-kernel map
 4133  */
 4134 
 4135 static struct vm_map_entry *
 4136 uvm_kmapent_alloc(struct vm_map *map, int flags)
 4137 {
 4138         struct vm_page *pg;
 4139         struct uvm_map_args args;
 4140         struct uvm_kmapent_hdr *ukh;
 4141         struct vm_map_entry *entry;
 4142         uvm_flag_t mapflags = UVM_MAPFLAG(UVM_PROT_ALL, UVM_PROT_ALL,
 4143             UVM_INH_NONE, UVM_ADV_RANDOM, flags | UVM_FLAG_NOMERGE);
 4144         vaddr_t va;
 4145         int error;
 4146         int i;
 4147         int s;
 4148 
 4149         KDASSERT(UVM_KMAPENT_CHUNK > 2);
 4150         KDASSERT(kernel_map != NULL);
 4151         KASSERT(vm_map_pmap(map) == pmap_kernel());
 4152 
 4153         uke_alloc++;
 4154         entry = NULL;
 4155 again:
 4156         /*
 4157          * try to grab an entry from freelist.
 4158          */
 4159         s = splvm();
 4160         simple_lock(&uvm.kentry_lock);
 4161         ukh = LIST_FIRST(&vm_map_to_kernel(map)->vmk_kentry_free);
 4162         if (ukh) {
 4163                 entry = uvm_kmapent_get(ukh);
 4164                 if (ukh->ukh_nused == UVM_KMAPENT_CHUNK)
 4165                         LIST_REMOVE(ukh, ukh_listq);
 4166         }
 4167         simple_unlock(&uvm.kentry_lock);
 4168         splx(s);
 4169 
 4170         if (entry)
 4171                 return entry;
 4172 
 4173         /*
 4174          * there's no free entry for this vm_map.
 4175          * now we need to allocate some vm_map_entry.
 4176          * for simplicity, always allocate one page chunk of them at once.
 4177          */
 4178 
 4179         pg = uvm_pagealloc(NULL, 0, NULL, 0);
 4180         if (__predict_false(pg == NULL)) {
 4181                 if (flags & UVM_FLAG_NOWAIT)
 4182                         return NULL;
 4183                 uvm_wait("kme_alloc");
 4184                 goto again;
 4185         }
 4186 
 4187         error = uvm_map_prepare(map, 0, PAGE_SIZE, NULL, 0, 0, mapflags, &args);
 4188         if (error) {
 4189                 uvm_pagefree(pg);
 4190                 return NULL;
 4191         }
 4192 
 4193         va = args.uma_start;
 4194 
 4195         pmap_kenter_pa(va, VM_PAGE_TO_PHYS(pg), VM_PROT_READ|VM_PROT_WRITE);
 4196         pmap_update(vm_map_pmap(map));
 4197 
 4198         ukh = (void *)va;
 4199 
 4200         /*
 4201          * use the first entry for ukh itsself.
 4202          */
 4203 
 4204         entry = &ukh->ukh_entries[0];
 4205         entry->flags = UVM_MAP_KERNEL | UVM_MAP_KMAPENT;
 4206         error = uvm_map_enter(map, &args, entry);
 4207         KASSERT(error == 0);
 4208 
 4209         ukh->ukh_nused = UVM_KMAPENT_CHUNK;
 4210         ukh->ukh_map = map;
 4211         ukh->ukh_freelist = NULL;
 4212         for (i = UVM_KMAPENT_CHUNK - 1; i >= 2; i--) {
 4213                 struct vm_map_entry *entry = &ukh->ukh_entries[i];
 4214 
 4215                 entry->flags = UVM_MAP_KERNEL;
 4216                 uvm_kmapent_put(ukh, entry);
 4217         }
 4218         KASSERT(ukh->ukh_nused == 2);
 4219 
 4220         s = splvm();
 4221         simple_lock(&uvm.kentry_lock);
 4222         LIST_INSERT_HEAD(&vm_map_to_kernel(map)->vmk_kentry_free,
 4223             ukh, ukh_listq);
 4224         simple_unlock(&uvm.kentry_lock);
 4225         splx(s);
 4226 
 4227         /*
 4228          * return second entry.
 4229          */
 4230 
 4231         entry = &ukh->ukh_entries[1];
 4232         entry->flags = UVM_MAP_KERNEL;
 4233         ukh_alloc++;
 4234         return entry;
 4235 }
 4236 
 4237 /*
 4238  * uvm_mapent_free: free map entry for in-kernel map
 4239  */
 4240 
 4241 static void
 4242 uvm_kmapent_free(struct vm_map_entry *entry)
 4243 {
 4244         struct uvm_kmapent_hdr *ukh;
 4245         struct vm_page *pg;
 4246         struct vm_map *map;
 4247         struct pmap *pmap;
 4248         vaddr_t va;
 4249         paddr_t pa;
 4250         struct vm_map_entry *deadentry;
 4251         int s;
 4252 
 4253         uke_free++;
 4254         ukh = UVM_KHDR_FIND(entry);
 4255         map = ukh->ukh_map;
 4256 
 4257         s = splvm();
 4258         simple_lock(&uvm.kentry_lock);
 4259         uvm_kmapent_put(ukh, entry);
 4260         if (ukh->ukh_nused > 1) {
 4261                 if (ukh->ukh_nused == UVM_KMAPENT_CHUNK - 1)
 4262                         LIST_INSERT_HEAD(
 4263                             &vm_map_to_kernel(map)->vmk_kentry_free,
 4264                             ukh, ukh_listq);
 4265                 simple_unlock(&uvm.kentry_lock);
 4266                 splx(s);
 4267                 return;
 4268         }
 4269 
 4270         /*
 4271          * now we can free this ukh.
 4272          *
 4273          * however, keep an empty ukh to avoid ping-pong.
 4274          */
 4275 
 4276         if (LIST_FIRST(&vm_map_to_kernel(map)->vmk_kentry_free) == ukh &&
 4277             LIST_NEXT(ukh, ukh_listq) == NULL) {
 4278                 simple_unlock(&uvm.kentry_lock);
 4279                 splx(s);
 4280                 return;
 4281         }
 4282         LIST_REMOVE(ukh, ukh_listq);
 4283         simple_unlock(&uvm.kentry_lock);
 4284         splx(s);
 4285 
 4286         KASSERT(ukh->ukh_nused == 1);
 4287 
 4288         /*
 4289          * remove map entry for ukh itsself.
 4290          */
 4291 
 4292         va = (vaddr_t)ukh;
 4293         KASSERT((va & PAGE_MASK) == 0);
 4294         uvm_unmap_remove(map, va, va + PAGE_SIZE, &deadentry, NULL);
 4295         KASSERT(deadentry->flags & UVM_MAP_KERNEL);
 4296         KASSERT(deadentry->flags & UVM_MAP_KMAPENT);
 4297         KASSERT(deadentry->next == NULL);
 4298         KASSERT(deadentry == &ukh->ukh_entries[0]);
 4299 
 4300         /*
 4301          * unmap the page from pmap and free it.
 4302          */
 4303 
 4304         pmap = vm_map_pmap(map);
 4305         KASSERT(pmap == pmap_kernel());
 4306         if (!pmap_extract(pmap, va, &pa))
 4307                 panic("%s: no mapping", __func__);
 4308         pmap_kremove(va, PAGE_SIZE);
 4309         pg = PHYS_TO_VM_PAGE(pa);
 4310         uvm_pagefree(pg);
 4311         ukh_free++;
 4312 }
 4313 
 4314 /*
 4315  * map entry reservation
 4316  */
 4317 
 4318 /*
 4319  * uvm_mapent_reserve: reserve map entries for clipping before locking map.
 4320  *
 4321  * => needed when unmapping entries allocated without UVM_FLAG_QUANTUM.
 4322  * => caller shouldn't hold map locked.
 4323  */
 4324 int
 4325 uvm_mapent_reserve(struct vm_map *map, struct uvm_mapent_reservation *umr,
 4326     int nentries, int flags)
 4327 {
 4328 
 4329         umr->umr_nentries = 0;
 4330 
 4331         if ((flags & UVM_FLAG_QUANTUM) != 0)
 4332                 return 0;
 4333 
 4334         if (!VM_MAP_USE_KMAPENT(map))
 4335                 return 0;
 4336 
 4337         while (nentries--) {
 4338                 struct vm_map_entry *ent;
 4339                 ent = uvm_kmapent_alloc(map, flags);
 4340                 if (!ent) {
 4341                         uvm_mapent_unreserve(map, umr);
 4342                         return ENOMEM;
 4343                 }
 4344                 UMR_PUTENTRY(umr, ent);
 4345         }
 4346 
 4347         return 0;
 4348 }
 4349 
 4350 /*
 4351  * uvm_mapent_unreserve:
 4352  *
 4353  * => caller shouldn't hold map locked.
 4354  * => never fail or sleep.
 4355  */
 4356 void
 4357 uvm_mapent_unreserve(struct vm_map *map, struct uvm_mapent_reservation *umr)
 4358 {
 4359 
 4360         while (!UMR_EMPTY(umr))
 4361                 uvm_kmapent_free(UMR_GETENTRY(umr));
 4362 }
 4363 
 4364 #if defined(DDB)
 4365 
 4366 /*
 4367  * DDB hooks
 4368  */
 4369 
 4370 /*
 4371  * uvm_map_printit: actually prints the map
 4372  */
 4373 
 4374 void
 4375 uvm_map_printit(struct vm_map *map, boolean_t full,
 4376     void (*pr)(const char *, ...))
 4377 {
 4378         struct vm_map_entry *entry;
 4379 
 4380         (*pr)("MAP %p: [0x%lx->0x%lx]\n", map, vm_map_min(map),
 4381             vm_map_max(map));
 4382         (*pr)("\t#ent=%d, sz=%d, ref=%d, version=%d, flags=0x%x\n",
 4383             map->nentries, map->size, map->ref_count, map->timestamp,
 4384             map->flags);
 4385         (*pr)("\tpmap=%p(resident=%ld, wired=%ld)\n", map->pmap,
 4386             pmap_resident_count(map->pmap), pmap_wired_count(map->pmap));
 4387         if (!full)
 4388                 return;
 4389         for (entry = map->header.next; entry != &map->header;
 4390             entry = entry->next) {
 4391                 (*pr)(" - %p: 0x%lx->0x%lx: obj=%p/0x%llx, amap=%p/%d\n",
 4392                     entry, entry->start, entry->end, entry->object.uvm_obj,
 4393                     (long long)entry->offset, entry->aref.ar_amap,
 4394                     entry->aref.ar_pageoff);
 4395                 (*pr)(
 4396                     "\tsubmap=%c, cow=%c, nc=%c, prot(max)=%d/%d, inh=%d, "
 4397                     "wc=%d, adv=%d\n",
 4398                     (entry->etype & UVM_ET_SUBMAP) ? 'T' : 'F',
 4399                     (entry->etype & UVM_ET_COPYONWRITE) ? 'T' : 'F',
 4400                     (entry->etype & UVM_ET_NEEDSCOPY) ? 'T' : 'F',
 4401                     entry->protection, entry->max_protection,
 4402                     entry->inheritance, entry->wired_count, entry->advice);
 4403         }
 4404 }
 4405 
 4406 /*
 4407  * uvm_object_printit: actually prints the object
 4408  */
 4409 
 4410 void
 4411 uvm_object_printit(struct uvm_object *uobj, boolean_t full,
 4412     void (*pr)(const char *, ...))
 4413 {
 4414         struct vm_page *pg;
 4415         int cnt = 0;
 4416 
 4417         (*pr)("OBJECT %p: locked=%d, pgops=%p, npages=%d, ",
 4418             uobj, uobj->vmobjlock.lock_data, uobj->pgops, uobj->uo_npages);
 4419         if (UVM_OBJ_IS_KERN_OBJECT(uobj))
 4420                 (*pr)("refs=<SYSTEM>\n");
 4421         else
 4422                 (*pr)("refs=%d\n", uobj->uo_refs);
 4423 
 4424         if (!full) {
 4425                 return;
 4426         }
 4427         (*pr)("  PAGES <pg,offset>:\n  ");
 4428         TAILQ_FOREACH(pg, &uobj->memq, listq) {
 4429                 cnt++;
 4430                 (*pr)("<%p,0x%llx> ", pg, (long long)pg->offset);
 4431                 if ((cnt % 3) == 0) {
 4432                         (*pr)("\n  ");
 4433                 }
 4434         }
 4435         if ((cnt % 3) != 0) {
 4436                 (*pr)("\n");
 4437         }
 4438 }
 4439 
 4440 /*
 4441  * uvm_page_printit: actually print the page
 4442  */
 4443 
 4444 static const char page_flagbits[] =
 4445         "\2\1BUSY\2WANTED\3TABLED\4CLEAN\5PAGEOUT\6RELEASED\7FAKE\10RDONLY"
 4446         "\11ZERO\15PAGER1";
 4447 static const char page_pqflagbits[] =
 4448         "\2\1FREE\2INACTIVE\3ACTIVE\5ANON\6AOBJ";
 4449 
 4450 void
 4451 uvm_page_printit(struct vm_page *pg, boolean_t full,
 4452     void (*pr)(const char *, ...))
 4453 {
 4454         struct vm_page *tpg;
 4455         struct uvm_object *uobj;
 4456         struct pglist *pgl;
 4457         char pgbuf[128];
 4458         char pqbuf[128];
 4459 
 4460         (*pr)("PAGE %p:\n", pg);
 4461         bitmask_snprintf(pg->flags, page_flagbits, pgbuf, sizeof(pgbuf));
 4462         bitmask_snprintf(pg->pqflags, page_pqflagbits, pqbuf, sizeof(pqbuf));
 4463         (*pr)("  flags=%s, pqflags=%s, wire_count=%d, pa=0x%lx\n",
 4464             pgbuf, pqbuf, pg->wire_count, (long)VM_PAGE_TO_PHYS(pg));
 4465         (*pr)("  uobject=%p, uanon=%p, offset=0x%llx loan_count=%d\n",
 4466             pg->uobject, pg->uanon, (long long)pg->offset, pg->loan_count);
 4467 #if defined(UVM_PAGE_TRKOWN)
 4468         if (pg->flags & PG_BUSY)
 4469                 (*pr)("  owning process = %d, tag=%s\n",
 4470                     pg->owner, pg->owner_tag);
 4471         else
 4472                 (*pr)("  page not busy, no owner\n");
 4473 #else
 4474         (*pr)("  [page ownership tracking disabled]\n");
 4475 #endif
 4476 
 4477         if (!full)
 4478                 return;
 4479 
 4480         /* cross-verify object/anon */
 4481         if ((pg->pqflags & PQ_FREE) == 0) {
 4482                 if (pg->pqflags & PQ_ANON) {
 4483                         if (pg->uanon == NULL || pg->uanon->u.an_page != pg)
 4484                             (*pr)("  >>> ANON DOES NOT POINT HERE <<< (%p)\n",
 4485                                 (pg->uanon) ? pg->uanon->u.an_page : NULL);
 4486                         else
 4487                                 (*pr)("  anon backpointer is OK\n");
 4488                 } else {
 4489                         uobj = pg->uobject;
 4490                         if (uobj) {
 4491                                 (*pr)("  checking object list\n");
 4492                                 TAILQ_FOREACH(tpg, &uobj->memq, listq) {
 4493                                         if (tpg == pg) {
 4494                                                 break;
 4495                                         }
 4496                                 }
 4497                                 if (tpg)
 4498                                         (*pr)("  page found on object list\n");
 4499                                 else
 4500                         (*pr)("  >>> PAGE NOT FOUND ON OBJECT LIST! <<<\n");
 4501                         }
 4502                 }
 4503         }
 4504 
 4505         /* cross-verify page queue */
 4506         if (pg->pqflags & PQ_FREE) {
 4507                 int fl = uvm_page_lookup_freelist(pg);
 4508                 int color = VM_PGCOLOR_BUCKET(pg);
 4509                 pgl = &uvm.page_free[fl].pgfl_buckets[color].pgfl_queues[
 4510                     ((pg)->flags & PG_ZERO) ? PGFL_ZEROS : PGFL_UNKNOWN];
 4511         } else if (pg->pqflags & PQ_INACTIVE) {
 4512                 pgl = &uvm.page_inactive;
 4513         } else if (pg->pqflags & PQ_ACTIVE) {
 4514                 pgl = &uvm.page_active;
 4515         } else {
 4516                 pgl = NULL;
 4517         }
 4518 
 4519         if (pgl) {
 4520                 (*pr)("  checking pageq list\n");
 4521                 TAILQ_FOREACH(tpg, pgl, pageq) {
 4522                         if (tpg == pg) {
 4523                                 break;
 4524                         }
 4525                 }
 4526                 if (tpg)
 4527                         (*pr)("  page found on pageq list\n");
 4528                 else
 4529                         (*pr)("  >>> PAGE NOT FOUND ON PAGEQ LIST! <<<\n");
 4530         }
 4531 }
 4532 #endif

Cache object: 197cca8e404ffe463b06ae487b2aad48


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.