The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/vm/vm_map.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  * Copyright (c) 1991, 1993
    3  *      The Regents of the University of California.  All rights reserved.
    4  *
    5  * This code is derived from software contributed to Berkeley by
    6  * The Mach Operating System project at Carnegie-Mellon University.
    7  *
    8  * Redistribution and use in source and binary forms, with or without
    9  * modification, are permitted provided that the following conditions
   10  * are met:
   11  * 1. Redistributions of source code must retain the above copyright
   12  *    notice, this list of conditions and the following disclaimer.
   13  * 2. Redistributions in binary form must reproduce the above copyright
   14  *    notice, this list of conditions and the following disclaimer in the
   15  *    documentation and/or other materials provided with the distribution.
   16  * 3. All advertising materials mentioning features or use of this software
   17  *    must display the following acknowledgement:
   18  *      This product includes software developed by the University of
   19  *      California, Berkeley and its contributors.
   20  * 4. Neither the name of the University nor the names of its contributors
   21  *    may be used to endorse or promote products derived from this software
   22  *    without specific prior written permission.
   23  *
   24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   34  * SUCH DAMAGE.
   35  *
   36  *      from: @(#)vm_map.c      8.3 (Berkeley) 1/12/94
   37  *
   38  *
   39  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
   40  * All rights reserved.
   41  *
   42  * Authors: Avadis Tevanian, Jr., Michael Wayne Young
   43  *
   44  * Permission to use, copy, modify and distribute this software and
   45  * its documentation is hereby granted, provided that both the copyright
   46  * notice and this permission notice appear in all copies of the
   47  * software, derivative works or modified versions, and any portions
   48  * thereof, and that both notices appear in supporting documentation.
   49  *
   50  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
   51  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
   52  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
   53  *
   54  * Carnegie Mellon requests users of this software to return to
   55  *
   56  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
   57  *  School of Computer Science
   58  *  Carnegie Mellon University
   59  *  Pittsburgh PA 15213-3890
   60  *
   61  * any improvements or extensions that they make and grant Carnegie the
   62  * rights to redistribute these changes.
   63  *
   64  * $FreeBSD$
   65  */
   66 
   67 /*
   68  *      Virtual memory mapping module.
   69  */
   70 
   71 #include <sys/param.h>
   72 #include <sys/systm.h>
   73 #include <sys/kernel.h>
   74 #include <sys/sysctl.h>
   75 #include <sys/proc.h>
   76 #include <sys/vmmeter.h>
   77 #include <sys/mman.h>
   78 #include <sys/vnode.h>
   79 #include <sys/resourcevar.h>
   80 #include <sys/file.h>
   81 
   82 #include <vm/vm.h>
   83 #include <vm/vm_param.h>
   84 #include <sys/lock.h>
   85 #include <vm/pmap.h>
   86 #include <vm/vm_map.h>
   87 #include <vm/vm_page.h>
   88 #include <vm/vm_object.h>
   89 #include <vm/vm_pager.h>
   90 #include <vm/vm_kern.h>
   91 #include <vm/vm_extern.h>
   92 #include <vm/swap_pager.h>
   93 #include <vm/vm_zone.h>
   94 
   95 /*
   96  *      Virtual memory maps provide for the mapping, protection,
   97  *      and sharing of virtual memory objects.  In addition,
   98  *      this module provides for an efficient virtual copy of
   99  *      memory from one map to another.
  100  *
  101  *      Synchronization is required prior to most operations.
  102  *
  103  *      Maps consist of an ordered doubly-linked list of simple
  104  *      entries; a single hint is used to speed up lookups.
  105  *
  106  *      Since portions of maps are specified by start/end addresses,
  107  *      which may not align with existing map entries, all
  108  *      routines merely "clip" entries to these start/end values.
  109  *      [That is, an entry is split into two, bordering at a
  110  *      start or end value.]  Note that these clippings may not
  111  *      always be necessary (as the two resulting entries are then
  112  *      not changed); however, the clipping is done for convenience.
  113  *
  114  *      As mentioned above, virtual copy operations are performed
  115  *      by copying VM object references from one map to
  116  *      another, and then marking both regions as copy-on-write.
  117  */
  118 
  119 /*
  120  *      vm_map_startup:
  121  *
  122  *      Initialize the vm_map module.  Must be called before
  123  *      any other vm_map routines.
  124  *
  125  *      Map and entry structures are allocated from the general
  126  *      purpose memory pool with some exceptions:
  127  *
  128  *      - The kernel map and kmem submap are allocated statically.
  129  *      - Kernel map entries are allocated out of a static pool.
  130  *
  131  *      These restrictions are necessary since malloc() uses the
  132  *      maps and requires map entries.
  133  */
  134 
  135 static struct vm_zone kmapentzone_store, mapentzone_store, mapzone_store;
  136 static vm_zone_t mapentzone, kmapentzone, mapzone, vmspace_zone;
  137 static struct vm_object kmapentobj, mapentobj, mapobj;
  138 
  139 static struct vm_map_entry map_entry_init[MAX_MAPENT];
  140 static struct vm_map_entry kmap_entry_init[MAX_KMAPENT];
  141 static struct vm_map map_init[MAX_KMAP];
  142 
  143 static void _vm_map_clip_end __P((vm_map_t, vm_map_entry_t, vm_offset_t));
  144 static void _vm_map_clip_start __P((vm_map_t, vm_map_entry_t, vm_offset_t));
  145 static vm_map_entry_t vm_map_entry_create __P((vm_map_t));
  146 static void vm_map_entry_delete __P((vm_map_t, vm_map_entry_t));
  147 static void vm_map_entry_dispose __P((vm_map_t, vm_map_entry_t));
  148 static void vm_map_entry_unwire __P((vm_map_t, vm_map_entry_t));
  149 static void vm_map_copy_entry __P((vm_map_t, vm_map_t, vm_map_entry_t,
  150                 vm_map_entry_t));
  151 static void vm_map_split __P((vm_map_entry_t));
  152 static void vm_map_unclip_range __P((vm_map_t map, vm_map_entry_t start_entry, vm_offset_t start, vm_offset_t end, int flags));
  153 
  154 static int old_msync;
  155 SYSCTL_INT(_vm, OID_AUTO, old_msync, CTLFLAG_RW, &old_msync, 0,
  156     "Use old (insecure) msync behavior");
  157 
  158 void
  159 vm_map_startup()
  160 {
  161         mapzone = &mapzone_store;
  162         zbootinit(mapzone, "MAP", sizeof (struct vm_map),
  163                 map_init, MAX_KMAP);
  164         kmapentzone = &kmapentzone_store;
  165         zbootinit(kmapentzone, "KMAP ENTRY", sizeof (struct vm_map_entry),
  166                 kmap_entry_init, MAX_KMAPENT);
  167         mapentzone = &mapentzone_store;
  168         zbootinit(mapentzone, "MAP ENTRY", sizeof (struct vm_map_entry),
  169                 map_entry_init, MAX_MAPENT);
  170 }
  171 
  172 /*
  173  * Allocate a vmspace structure, including a vm_map and pmap,
  174  * and initialize those structures.  The refcnt is set to 1.
  175  * The remaining fields must be initialized by the caller.
  176  */
  177 struct vmspace *
  178 vmspace_alloc(min, max)
  179         vm_offset_t min, max;
  180 {
  181         struct vmspace *vm;
  182 
  183         vm = zalloc(vmspace_zone);
  184         vm_map_init(&vm->vm_map, min, max);
  185         pmap_pinit(vmspace_pmap(vm));
  186         vm->vm_map.pmap = vmspace_pmap(vm);             /* XXX */
  187         vm->vm_refcnt = 1;
  188         vm->vm_shm = NULL;
  189         vm->vm_exitingcnt = 0;
  190         return (vm);
  191 }
  192 
  193 void
  194 vm_init2(void) {
  195         zinitna(kmapentzone, &kmapentobj,
  196                 NULL, 0, lmin((VM_MAX_KERNEL_ADDRESS - KERNBASE) / PAGE_SIZE,
  197                 cnt.v_page_count) / 8 + maxproc * 2 + maxfiles,
  198                 ZONE_INTERRUPT, 1);
  199         zinitna(mapentzone, &mapentobj,
  200                 NULL, 0, 0, 0, 1);
  201         zinitna(mapzone, &mapobj,
  202                 NULL, 0, 0, 0, 1);
  203         vmspace_zone = zinit("VMSPACE", sizeof (struct vmspace), 0, 0, 3);
  204         pmap_init2();
  205         vm_object_init2();
  206 }
  207 
  208 static __inline void
  209 vmspace_dofree(struct vmspace *vm)
  210 {
  211         /*
  212          * Lock the map, to wait out all other references to it.
  213          * Delete all of the mappings and pages they hold, then call
  214          * the pmap module to reclaim anything left.
  215          */
  216         vm_map_lock(&vm->vm_map);
  217         (void) vm_map_delete(&vm->vm_map, vm->vm_map.min_offset,
  218             vm->vm_map.max_offset);
  219         vm_map_unlock(&vm->vm_map);
  220 
  221         pmap_release(vmspace_pmap(vm));
  222         zfree(vmspace_zone, vm);
  223 }
  224 
  225 void
  226 vmspace_free(struct vmspace *vm)
  227 {
  228         if (vm->vm_refcnt == 0)
  229                 panic("vmspace_free: attempt to free already freed vmspace");
  230 
  231         if (--vm->vm_refcnt == 0 && vm->vm_exitingcnt == 0)
  232                 vmspace_dofree(vm);
  233 }
  234 
  235 void
  236 vmspace_exitfree(struct proc *p)
  237 {
  238         struct vmspace *vm;
  239 
  240         vm = p->p_vmspace;
  241         p->p_vmspace = NULL;
  242 
  243         /*
  244          * cleanup by parent process wait()ing on exiting child.  vm_refcnt
  245          * may not be 0 (e.g. fork() and child exits without exec()ing).
  246          * exitingcnt may increment above 0 and drop back down to zero
  247          * several times while vm_refcnt is held non-zero.  vm_refcnt
  248          * may also increment above 0 and drop back down to zero several
  249          * times while vm_exitingcnt is held non-zero.
  250          *
  251          * The last wait on the exiting child's vmspace will clean up
  252          * the remainder of the vmspace.
  253          */
  254         if (--vm->vm_exitingcnt == 0 && vm->vm_refcnt == 0)
  255                 vmspace_dofree(vm);
  256 }
  257 
  258 /*
  259  * vmspace_swap_count() - count the approximate swap useage in pages for a
  260  *                        vmspace.
  261  *
  262  *      Swap useage is determined by taking the proportional swap used by
  263  *      VM objects backing the VM map.  To make up for fractional losses,
  264  *      if the VM object has any swap use at all the associated map entries
  265  *      count for at least 1 swap page.
  266  */
  267 int
  268 vmspace_swap_count(struct vmspace *vmspace)
  269 {
  270         vm_map_t map = &vmspace->vm_map;
  271         vm_map_entry_t cur;
  272         int count = 0;
  273 
  274         for (cur = map->header.next; cur != &map->header; cur = cur->next) {
  275                 vm_object_t object;
  276 
  277                 if ((cur->eflags & MAP_ENTRY_IS_SUB_MAP) == 0 &&
  278                     (object = cur->object.vm_object) != NULL &&
  279                     object->type == OBJT_SWAP
  280                 ) {
  281                         int n = (cur->end - cur->start) / PAGE_SIZE;
  282 
  283                         if (object->un_pager.swp.swp_bcount) {
  284                                 count += object->un_pager.swp.swp_bcount *
  285                                     SWAP_META_PAGES * n / object->size + 1;
  286                         }
  287                 }
  288         }
  289         return(count);
  290 }
  291 
  292 
  293 /*
  294  *      vm_map_create:
  295  *
  296  *      Creates and returns a new empty VM map with
  297  *      the given physical map structure, and having
  298  *      the given lower and upper address bounds.
  299  */
  300 vm_map_t
  301 vm_map_create(pmap, min, max)
  302         pmap_t pmap;
  303         vm_offset_t min, max;
  304 {
  305         vm_map_t result;
  306 
  307         result = zalloc(mapzone);
  308         vm_map_init(result, min, max);
  309         result->pmap = pmap;
  310         return (result);
  311 }
  312 
  313 /*
  314  * Initialize an existing vm_map structure
  315  * such as that in the vmspace structure.
  316  * The pmap is set elsewhere.
  317  */
  318 void
  319 vm_map_init(map, min, max)
  320         struct vm_map *map;
  321         vm_offset_t min, max;
  322 {
  323         map->header.next = map->header.prev = &map->header;
  324         map->nentries = 0;
  325         map->size = 0;
  326         map->system_map = 0;
  327         map->infork = 0;
  328         map->min_offset = min;
  329         map->max_offset = max;
  330         map->first_free = &map->header;
  331         map->hint = &map->header;
  332         map->timestamp = 0;
  333         lockinit(&map->lock, PVM, "thrd_sleep", 0, LK_NOPAUSE);
  334 }
  335 
  336 /*
  337  *      vm_map_entry_dispose:   [ internal use only ]
  338  *
  339  *      Inverse of vm_map_entry_create.
  340  */
  341 static void
  342 vm_map_entry_dispose(map, entry)
  343         vm_map_t map;
  344         vm_map_entry_t entry;
  345 {
  346         if (map->system_map || !mapentzone)
  347                 zfreei(kmapentzone, entry);
  348         else
  349                 zfree(mapentzone, entry);
  350 }
  351 
  352 /*
  353  *      vm_map_entry_create:    [ internal use only ]
  354  *
  355  *      Allocates a VM map entry for insertion.
  356  *      No entry fields are filled in.  This routine is
  357  */
  358 static vm_map_entry_t
  359 vm_map_entry_create(map)
  360         vm_map_t map;
  361 {
  362         vm_map_entry_t new_entry;
  363 
  364         if (map->system_map || !mapentzone)
  365                 new_entry = zalloci(kmapentzone);
  366         else
  367                 new_entry = zalloc(mapentzone);
  368         if (new_entry == NULL)
  369             panic("vm_map_entry_create: kernel resources exhausted");
  370         return(new_entry);
  371 }
  372 
  373 /*
  374  *      vm_map_entry_{un,}link:
  375  *
  376  *      Insert/remove entries from maps.
  377  */
  378 static __inline void
  379 vm_map_entry_link(vm_map_t map,
  380                   vm_map_entry_t after_where,
  381                   vm_map_entry_t entry)
  382 {
  383         map->nentries++;
  384         entry->prev = after_where;
  385         entry->next = after_where->next;
  386         entry->next->prev = entry;
  387         after_where->next = entry;
  388 }
  389 
  390 static __inline void
  391 vm_map_entry_unlink(vm_map_t map,
  392                     vm_map_entry_t entry)
  393 {
  394         vm_map_entry_t prev;
  395         vm_map_entry_t next;
  396 
  397         if (entry->eflags & MAP_ENTRY_IN_TRANSITION)
  398                 panic("vm_map_entry_unlink: attempt to mess with locked entry! %p", entry);
  399         prev = entry->prev;
  400         next = entry->next;
  401         next->prev = prev;
  402         prev->next = next;
  403         map->nentries--;
  404 }
  405 
  406 /*
  407  *      SAVE_HINT:
  408  *
  409  *      Saves the specified entry as the hint for
  410  *      future lookups.
  411  */
  412 #define SAVE_HINT(map,value) \
  413                 (map)->hint = (value);
  414 
  415 /*
  416  *      vm_map_lookup_entry:    [ internal use only ]
  417  *
  418  *      Finds the map entry containing (or
  419  *      immediately preceding) the specified address
  420  *      in the given map; the entry is returned
  421  *      in the "entry" parameter.  The boolean
  422  *      result indicates whether the address is
  423  *      actually contained in the map.
  424  */
  425 boolean_t
  426 vm_map_lookup_entry(map, address, entry)
  427         vm_map_t map;
  428         vm_offset_t address;
  429         vm_map_entry_t *entry;  /* OUT */
  430 {
  431         vm_map_entry_t cur;
  432         vm_map_entry_t last;
  433 
  434         /*
  435          * Start looking either from the head of the list, or from the hint.
  436          */
  437 
  438         cur = map->hint;
  439 
  440         if (cur == &map->header)
  441                 cur = cur->next;
  442 
  443         if (address >= cur->start) {
  444                 /*
  445                  * Go from hint to end of list.
  446                  *
  447                  * But first, make a quick check to see if we are already looking
  448                  * at the entry we want (which is usually the case). Note also
  449                  * that we don't need to save the hint here... it is the same
  450                  * hint (unless we are at the header, in which case the hint
  451                  * didn't buy us anything anyway).
  452                  */
  453                 last = &map->header;
  454                 if ((cur != last) && (cur->end > address)) {
  455                         *entry = cur;
  456                         return (TRUE);
  457                 }
  458         } else {
  459                 /*
  460                  * Go from start to hint, *inclusively*
  461                  */
  462                 last = cur->next;
  463                 cur = map->header.next;
  464         }
  465 
  466         /*
  467          * Search linearly
  468          */
  469 
  470         while (cur != last) {
  471                 if (cur->end > address) {
  472                         if (address >= cur->start) {
  473                                 /*
  474                                  * Save this lookup for future hints, and
  475                                  * return
  476                                  */
  477 
  478                                 *entry = cur;
  479                                 SAVE_HINT(map, cur);
  480                                 return (TRUE);
  481                         }
  482                         break;
  483                 }
  484                 cur = cur->next;
  485         }
  486         *entry = cur->prev;
  487         SAVE_HINT(map, *entry);
  488         return (FALSE);
  489 }
  490 
  491 /*
  492  *      vm_map_insert:
  493  *
  494  *      Inserts the given whole VM object into the target
  495  *      map at the specified address range.  The object's
  496  *      size should match that of the address range.
  497  *
  498  *      Requires that the map be locked, and leaves it so.
  499  *
  500  *      If object is non-NULL, ref count must be bumped by caller
  501  *      prior to making call to account for the new entry.
  502  */
  503 int
  504 vm_map_insert(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
  505               vm_offset_t start, vm_offset_t end, vm_prot_t prot, vm_prot_t max,
  506               int cow)
  507 {
  508         vm_map_entry_t new_entry;
  509         vm_map_entry_t prev_entry;
  510         vm_map_entry_t temp_entry;
  511         vm_eflags_t protoeflags;
  512 
  513         /*
  514          * Check that the start and end points are not bogus.
  515          */
  516 
  517         if ((start < map->min_offset) || (end > map->max_offset) ||
  518             (start >= end))
  519                 return (KERN_INVALID_ADDRESS);
  520 
  521         /*
  522          * Find the entry prior to the proposed starting address; if it's part
  523          * of an existing entry, this range is bogus.
  524          */
  525 
  526         if (vm_map_lookup_entry(map, start, &temp_entry))
  527                 return (KERN_NO_SPACE);
  528 
  529         prev_entry = temp_entry;
  530 
  531         /*
  532          * Assert that the next entry doesn't overlap the end point.
  533          */
  534 
  535         if ((prev_entry->next != &map->header) &&
  536             (prev_entry->next->start < end))
  537                 return (KERN_NO_SPACE);
  538 
  539         protoeflags = 0;
  540 
  541         if (cow & MAP_COPY_ON_WRITE)
  542                 protoeflags |= MAP_ENTRY_COW|MAP_ENTRY_NEEDS_COPY;
  543 
  544         if (cow & MAP_NOFAULT) {
  545                 protoeflags |= MAP_ENTRY_NOFAULT;
  546 
  547                 KASSERT(object == NULL,
  548                         ("vm_map_insert: paradoxical MAP_NOFAULT request"));
  549         }
  550         if (cow & MAP_DISABLE_SYNCER)
  551                 protoeflags |= MAP_ENTRY_NOSYNC;
  552         if (cow & MAP_DISABLE_COREDUMP)
  553                 protoeflags |= MAP_ENTRY_NOCOREDUMP;
  554 
  555         if (object) {
  556                 /*
  557                  * When object is non-NULL, it could be shared with another
  558                  * process.  We have to set or clear OBJ_ONEMAPPING 
  559                  * appropriately.
  560                  */
  561                 if ((object->ref_count > 1) || (object->shadow_count != 0)) {
  562                         vm_object_clear_flag(object, OBJ_ONEMAPPING);
  563                 }
  564         }
  565         else if ((prev_entry != &map->header) &&
  566                  (prev_entry->eflags == protoeflags) &&
  567                  (prev_entry->end == start) &&
  568                  (prev_entry->wired_count == 0) &&
  569                  ((prev_entry->object.vm_object == NULL) ||
  570                   vm_object_coalesce(prev_entry->object.vm_object,
  571                                      OFF_TO_IDX(prev_entry->offset),
  572                                      (vm_size_t)(prev_entry->end - prev_entry->start),
  573                                      (vm_size_t)(end - prev_entry->end)))) {
  574                 /*
  575                  * We were able to extend the object.  Determine if we
  576                  * can extend the previous map entry to include the 
  577                  * new range as well.
  578                  */
  579                 if ((prev_entry->inheritance == VM_INHERIT_DEFAULT) &&
  580                     (prev_entry->protection == prot) &&
  581                     (prev_entry->max_protection == max)) {
  582                         map->size += (end - prev_entry->end);
  583                         prev_entry->end = end;
  584                         vm_map_simplify_entry(map, prev_entry);
  585                         return (KERN_SUCCESS);
  586                 }
  587 
  588                 /*
  589                  * If we can extend the object but cannot extend the
  590                  * map entry, we have to create a new map entry.  We
  591                  * must bump the ref count on the extended object to
  592                  * account for it.  object may be NULL.
  593                  */
  594                 object = prev_entry->object.vm_object;
  595                 offset = prev_entry->offset +
  596                         (prev_entry->end - prev_entry->start);
  597                 vm_object_reference(object);
  598         }
  599 
  600         /*
  601          * NOTE: if conditionals fail, object can be NULL here.  This occurs
  602          * in things like the buffer map where we manage kva but do not manage
  603          * backing objects.
  604          */
  605 
  606         /*
  607          * Create a new entry
  608          */
  609 
  610         new_entry = vm_map_entry_create(map);
  611         new_entry->start = start;
  612         new_entry->end = end;
  613 
  614         new_entry->eflags = protoeflags;
  615         new_entry->object.vm_object = object;
  616         new_entry->offset = offset;
  617         new_entry->avail_ssize = 0;
  618 
  619         new_entry->inheritance = VM_INHERIT_DEFAULT;
  620         new_entry->protection = prot;
  621         new_entry->max_protection = max;
  622         new_entry->wired_count = 0;
  623 
  624         /*
  625          * Insert the new entry into the list
  626          */
  627 
  628         vm_map_entry_link(map, prev_entry, new_entry);
  629         map->size += new_entry->end - new_entry->start;
  630 
  631         /*
  632          * Update the free space hint
  633          */
  634         if ((map->first_free == prev_entry) &&
  635             (prev_entry->end >= new_entry->start)) {
  636                 map->first_free = new_entry;
  637         }
  638 
  639 #if 0
  640         /*
  641          * Temporarily removed to avoid MAP_STACK panic, due to
  642          * MAP_STACK being a huge hack.  Will be added back in
  643          * when MAP_STACK (and the user stack mapping) is fixed.
  644          */
  645         /*
  646          * It may be possible to simplify the entry
  647          */
  648         vm_map_simplify_entry(map, new_entry);
  649 #endif
  650 
  651         if (cow & (MAP_PREFAULT|MAP_PREFAULT_PARTIAL)) {
  652                 pmap_object_init_pt(map->pmap, start, prot,
  653                                     object, OFF_TO_IDX(offset), end - start,
  654                                     cow & MAP_PREFAULT_PARTIAL);
  655         }
  656 
  657         return (KERN_SUCCESS);
  658 }
  659 
  660 /*
  661  * Find sufficient space for `length' bytes in the given map, starting at
  662  * `start'.  The map must be locked.  Returns 0 on success, 1 on no space.
  663  */
  664 int
  665 vm_map_findspace(map, start, length, addr)
  666         vm_map_t map;
  667         vm_offset_t start;
  668         vm_size_t length;
  669         vm_offset_t *addr;
  670 {
  671         vm_map_entry_t entry, next;
  672         vm_offset_t end;
  673 
  674         if (start < map->min_offset)
  675                 start = map->min_offset;
  676         if (start > map->max_offset)
  677                 return (1);
  678 
  679         /*
  680          * Look for the first possible address; if there's already something
  681          * at this address, we have to start after it.
  682          */
  683         if (start == map->min_offset) {
  684                 if ((entry = map->first_free) != &map->header)
  685                         start = entry->end;
  686         } else {
  687                 vm_map_entry_t tmp;
  688 
  689                 if (vm_map_lookup_entry(map, start, &tmp))
  690                         start = tmp->end;
  691                 entry = tmp;
  692         }
  693 
  694         /*
  695          * Look through the rest of the map, trying to fit a new region in the
  696          * gap between existing regions, or after the very last region.
  697          */
  698         for (;; start = (entry = next)->end) {
  699                 /*
  700                  * Find the end of the proposed new region.  Be sure we didn't
  701                  * go beyond the end of the map, or wrap around the address;
  702                  * if so, we lose.  Otherwise, if this is the last entry, or
  703                  * if the proposed new region fits before the next entry, we
  704                  * win.
  705                  */
  706                 end = start + length;
  707                 if (end > map->max_offset || end < start)
  708                         return (1);
  709                 next = entry->next;
  710                 if (next == &map->header || next->start >= end)
  711                         break;
  712         }
  713         SAVE_HINT(map, entry);
  714         *addr = start;
  715         if (map == kernel_map) {
  716                 vm_offset_t ksize;
  717                 if ((ksize = round_page(start + length)) > kernel_vm_end) {
  718                         pmap_growkernel(ksize);
  719                 }
  720         }
  721         return (0);
  722 }
  723 
  724 /*
  725  *      vm_map_find finds an unallocated region in the target address
  726  *      map with the given length.  The search is defined to be
  727  *      first-fit from the specified address; the region found is
  728  *      returned in the same parameter.
  729  *
  730  *      If object is non-NULL, ref count must be bumped by caller
  731  *      prior to making call to account for the new entry.
  732  */
  733 int
  734 vm_map_find(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
  735             vm_offset_t *addr,  /* IN/OUT */
  736             vm_size_t length, boolean_t find_space, vm_prot_t prot,
  737             vm_prot_t max, int cow)
  738 {
  739         vm_offset_t start;
  740         int result, s = 0;
  741 
  742         start = *addr;
  743 
  744         if (map == kmem_map || map == mb_map)
  745                 s = splvm();
  746 
  747         vm_map_lock(map);
  748         if (find_space) {
  749                 if (vm_map_findspace(map, start, length, addr)) {
  750                         vm_map_unlock(map);
  751                         if (map == kmem_map || map == mb_map)
  752                                 splx(s);
  753                         return (KERN_NO_SPACE);
  754                 }
  755                 start = *addr;
  756         }
  757         result = vm_map_insert(map, object, offset,
  758                 start, start + length, prot, max, cow);
  759         vm_map_unlock(map);
  760 
  761         if (map == kmem_map || map == mb_map)
  762                 splx(s);
  763 
  764         return (result);
  765 }
  766 
  767 /*
  768  *      vm_map_simplify_entry:
  769  *
  770  *      Simplify the given map entry by merging with either neighbor.  This
  771  *      routine also has the ability to merge with both neighbors.
  772  *
  773  *      The map must be locked.
  774  *
  775  *      This routine guarentees that the passed entry remains valid (though
  776  *      possibly extended).  When merging, this routine may delete one or
  777  *      both neighbors.  No action is taken on entries which have their
  778  *      in-transition flag set.
  779  */
  780 void
  781 vm_map_simplify_entry(map, entry)
  782         vm_map_t map;
  783         vm_map_entry_t entry;
  784 {
  785         vm_map_entry_t next, prev;
  786         vm_size_t prevsize, esize;
  787 
  788         if (entry->eflags & (MAP_ENTRY_IN_TRANSITION | MAP_ENTRY_IS_SUB_MAP)) {
  789                 ++cnt.v_intrans_coll;
  790                 return;
  791         }
  792 
  793         prev = entry->prev;
  794         if (prev != &map->header) {
  795                 prevsize = prev->end - prev->start;
  796                 if ( (prev->end == entry->start) &&
  797                      (prev->object.vm_object == entry->object.vm_object) &&
  798                      (!prev->object.vm_object ||
  799                         (prev->offset + prevsize == entry->offset)) &&
  800                      (prev->eflags == entry->eflags) &&
  801                      (prev->protection == entry->protection) &&
  802                      (prev->max_protection == entry->max_protection) &&
  803                      (prev->inheritance == entry->inheritance) &&
  804                      (prev->wired_count == entry->wired_count)) {
  805                         if (map->first_free == prev)
  806                                 map->first_free = entry;
  807                         if (map->hint == prev)
  808                                 map->hint = entry;
  809                         vm_map_entry_unlink(map, prev);
  810                         entry->start = prev->start;
  811                         entry->offset = prev->offset;
  812                         if (prev->object.vm_object)
  813                                 vm_object_deallocate(prev->object.vm_object);
  814                         vm_map_entry_dispose(map, prev);
  815                 }
  816         }
  817 
  818         next = entry->next;
  819         if (next != &map->header) {
  820                 esize = entry->end - entry->start;
  821                 if ((entry->end == next->start) &&
  822                     (next->object.vm_object == entry->object.vm_object) &&
  823                      (!entry->object.vm_object ||
  824                         (entry->offset + esize == next->offset)) &&
  825                     (next->eflags == entry->eflags) &&
  826                     (next->protection == entry->protection) &&
  827                     (next->max_protection == entry->max_protection) &&
  828                     (next->inheritance == entry->inheritance) &&
  829                     (next->wired_count == entry->wired_count)) {
  830                         if (map->first_free == next)
  831                                 map->first_free = entry;
  832                         if (map->hint == next)
  833                                 map->hint = entry;
  834                         vm_map_entry_unlink(map, next);
  835                         entry->end = next->end;
  836                         if (next->object.vm_object)
  837                                 vm_object_deallocate(next->object.vm_object);
  838                         vm_map_entry_dispose(map, next);
  839                 }
  840         }
  841 }
  842 /*
  843  *      vm_map_clip_start:      [ internal use only ]
  844  *
  845  *      Asserts that the given entry begins at or after
  846  *      the specified address; if necessary,
  847  *      it splits the entry into two.
  848  */
  849 #define vm_map_clip_start(map, entry, startaddr) \
  850 { \
  851         if (startaddr > entry->start) \
  852                 _vm_map_clip_start(map, entry, startaddr); \
  853 }
  854 
  855 /*
  856  *      This routine is called only when it is known that
  857  *      the entry must be split.
  858  */
  859 static void
  860 _vm_map_clip_start(map, entry, start)
  861         vm_map_t map;
  862         vm_map_entry_t entry;
  863         vm_offset_t start;
  864 {
  865         vm_map_entry_t new_entry;
  866 
  867         /*
  868          * Split off the front portion -- note that we must insert the new
  869          * entry BEFORE this one, so that this entry has the specified
  870          * starting address.
  871          */
  872 
  873         vm_map_simplify_entry(map, entry);
  874 
  875         /*
  876          * If there is no object backing this entry, we might as well create
  877          * one now.  If we defer it, an object can get created after the map
  878          * is clipped, and individual objects will be created for the split-up
  879          * map.  This is a bit of a hack, but is also about the best place to
  880          * put this improvement.
  881          */
  882 
  883         if (entry->object.vm_object == NULL && !map->system_map) {
  884                 vm_object_t object;
  885                 object = vm_object_allocate(OBJT_DEFAULT,
  886                                 atop(entry->end - entry->start));
  887                 entry->object.vm_object = object;
  888                 entry->offset = 0;
  889         }
  890 
  891         new_entry = vm_map_entry_create(map);
  892         *new_entry = *entry;
  893 
  894         new_entry->end = start;
  895         entry->offset += (start - entry->start);
  896         entry->start = start;
  897 
  898         vm_map_entry_link(map, entry->prev, new_entry);
  899 
  900         if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) {
  901                 vm_object_reference(new_entry->object.vm_object);
  902         }
  903 }
  904 
  905 /*
  906  *      vm_map_clip_end:        [ internal use only ]
  907  *
  908  *      Asserts that the given entry ends at or before
  909  *      the specified address; if necessary,
  910  *      it splits the entry into two.
  911  */
  912 
  913 #define vm_map_clip_end(map, entry, endaddr) \
  914 { \
  915         if (endaddr < entry->end) \
  916                 _vm_map_clip_end(map, entry, endaddr); \
  917 }
  918 
  919 /*
  920  *      This routine is called only when it is known that
  921  *      the entry must be split.
  922  */
  923 static void
  924 _vm_map_clip_end(map, entry, end)
  925         vm_map_t map;
  926         vm_map_entry_t entry;
  927         vm_offset_t end;
  928 {
  929         vm_map_entry_t new_entry;
  930 
  931         /*
  932          * If there is no object backing this entry, we might as well create
  933          * one now.  If we defer it, an object can get created after the map
  934          * is clipped, and individual objects will be created for the split-up
  935          * map.  This is a bit of a hack, but is also about the best place to
  936          * put this improvement.
  937          */
  938 
  939         if (entry->object.vm_object == NULL && !map->system_map) {
  940                 vm_object_t object;
  941                 object = vm_object_allocate(OBJT_DEFAULT,
  942                                 atop(entry->end - entry->start));
  943                 entry->object.vm_object = object;
  944                 entry->offset = 0;
  945         }
  946 
  947         /*
  948          * Create a new entry and insert it AFTER the specified entry
  949          */
  950 
  951         new_entry = vm_map_entry_create(map);
  952         *new_entry = *entry;
  953 
  954         new_entry->start = entry->end = end;
  955         new_entry->offset += (end - entry->start);
  956 
  957         vm_map_entry_link(map, entry, new_entry);
  958 
  959         if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) {
  960                 vm_object_reference(new_entry->object.vm_object);
  961         }
  962 }
  963 
  964 /*
  965  *      VM_MAP_RANGE_CHECK:     [ internal use only ]
  966  *
  967  *      Asserts that the starting and ending region
  968  *      addresses fall within the valid range of the map.
  969  */
  970 #define VM_MAP_RANGE_CHECK(map, start, end)             \
  971                 {                                       \
  972                 if (start < vm_map_min(map))            \
  973                         start = vm_map_min(map);        \
  974                 if (end > vm_map_max(map))              \
  975                         end = vm_map_max(map);          \
  976                 if (start > end)                        \
  977                         start = end;                    \
  978                 }
  979 
  980 /*
  981  *      vm_map_transition_wait: [ kernel use only ]
  982  *
  983  *      Used to block when an in-transition collison occurs.  The map
  984  *      is unlocked for the sleep and relocked before the return.
  985  */
  986 static
  987 void
  988 vm_map_transition_wait(vm_map_t map)
  989 {
  990         vm_map_unlock(map);
  991         tsleep(map, PVM, "vment", 0);
  992         vm_map_lock(map);
  993 }
  994 
  995 /*
  996  * CLIP_CHECK_BACK
  997  * CLIP_CHECK_FWD
  998  *
  999  *      When we do blocking operations with the map lock held it is
 1000  *      possible that a clip might have occured on our in-transit entry,
 1001  *      requiring an adjustment to the entry in our loop.  These macros
 1002  *      help the pageable and clip_range code deal with the case.  The
 1003  *      conditional costs virtually nothing if no clipping has occured.
 1004  */
 1005 
 1006 #define CLIP_CHECK_BACK(entry, save_start)              \
 1007     do {                                                \
 1008             while (entry->start != save_start) {        \
 1009                     entry = entry->prev;                \
 1010                     KASSERT(entry != &map->header, ("bad entry clip")); \
 1011             }                                           \
 1012     } while(0)
 1013 
 1014 #define CLIP_CHECK_FWD(entry, save_end)                 \
 1015     do {                                                \
 1016             while (entry->end != save_end) {            \
 1017                     entry = entry->next;                \
 1018                     KASSERT(entry != &map->header, ("bad entry clip")); \
 1019             }                                           \
 1020     } while(0)
 1021 
 1022 
 1023 /*
 1024  *      vm_map_clip_range:      [ kernel use only ]
 1025  *
 1026  *      Clip the specified range and return the base entry.  The
 1027  *      range may cover several entries starting at the returned base
 1028  *      and the first and last entry in the covering sequence will be
 1029  *      properly clipped to the requested start and end address.
 1030  *
 1031  *      If no holes are allowed you should pass the MAP_CLIP_NO_HOLES
 1032  *      flag.  
 1033  *
 1034  *      The MAP_ENTRY_IN_TRANSITION flag will be set for the entries
 1035  *      covered by the requested range.
 1036  *
 1037  *      The map must be exclusively locked on entry and will remain locked
 1038  *      on return. If no range exists or the range contains holes and you
 1039  *      specified that no holes were allowed, NULL will be returned.  This
 1040  *      routine may temporarily unlock the map in order avoid a deadlock when
 1041  *      sleeping.
 1042  */
 1043 static
 1044 vm_map_entry_t
 1045 vm_map_clip_range(vm_map_t map, vm_offset_t start, vm_offset_t end, int flags)
 1046 {
 1047         vm_map_entry_t start_entry;
 1048         vm_map_entry_t entry;
 1049 
 1050         /*
 1051          * Locate the entry and effect initial clipping.  The in-transition
 1052          * case does not occur very often so do not try to optimize it.
 1053          */
 1054 again:
 1055         if (vm_map_lookup_entry(map, start, &start_entry) == FALSE)
 1056                 return (NULL);
 1057         entry = start_entry;
 1058         if (entry->eflags & MAP_ENTRY_IN_TRANSITION) {
 1059                 entry->eflags |= MAP_ENTRY_NEEDS_WAKEUP;
 1060                 ++cnt.v_intrans_coll;
 1061                 ++cnt.v_intrans_wait;
 1062                 vm_map_transition_wait(map);
 1063                 /*
 1064                  * entry and/or start_entry may have been clipped while
 1065                  * we slept, or may have gone away entirely.  We have
 1066                  * to restart from the lookup.
 1067                  */
 1068                 goto again;
 1069         }
 1070         /*
 1071          * Since we hold an exclusive map lock we do not have to restart
 1072          * after clipping, even though clipping may block in zalloc.
 1073          */
 1074         vm_map_clip_start(map, entry, start);
 1075         vm_map_clip_end(map, entry, end);
 1076         entry->eflags |= MAP_ENTRY_IN_TRANSITION;
 1077 
 1078         /*
 1079          * Scan entries covered by the range.  When working on the next
 1080          * entry a restart need only re-loop on the current entry which
 1081          * we have already locked, since 'next' may have changed.  Also,
 1082          * even though entry is safe, it may have been clipped so we
 1083          * have to iterate forwards through the clip after sleeping.
 1084          */
 1085         while (entry->next != &map->header && entry->next->start < end) {
 1086                 vm_map_entry_t next = entry->next;
 1087 
 1088                 if (flags & MAP_CLIP_NO_HOLES) {
 1089                         if (next->start > entry->end) {
 1090                                 vm_map_unclip_range(map, start_entry,
 1091                                         start, entry->end, flags);
 1092                                 return(NULL);
 1093                         }
 1094                 }
 1095 
 1096                 if (next->eflags & MAP_ENTRY_IN_TRANSITION) {
 1097                         vm_offset_t save_end = entry->end;
 1098                         next->eflags |= MAP_ENTRY_NEEDS_WAKEUP;
 1099                         ++cnt.v_intrans_coll;
 1100                         ++cnt.v_intrans_wait;
 1101                         vm_map_transition_wait(map);
 1102 
 1103                         /*
 1104                          * clips might have occured while we blocked.
 1105                          */
 1106                         CLIP_CHECK_FWD(entry, save_end);
 1107                         CLIP_CHECK_BACK(start_entry, start);
 1108                         continue;
 1109                 }
 1110                 /*
 1111                  * No restart necessary even though clip_end may block, we
 1112                  * are holding the map lock.
 1113                  */
 1114                 vm_map_clip_end(map, next, end);
 1115                 next->eflags |= MAP_ENTRY_IN_TRANSITION;
 1116                 entry = next;
 1117         }
 1118         if (flags & MAP_CLIP_NO_HOLES) {
 1119                 if (entry->end != end) {
 1120                         vm_map_unclip_range(map, start_entry,
 1121                                 start, entry->end, flags);
 1122                         return(NULL);
 1123                 }
 1124         }
 1125         return(start_entry);
 1126 }
 1127 
 1128 /*
 1129  *      vm_map_unclip_range:    [ kernel use only ]
 1130  *
 1131  *      Undo the effect of vm_map_clip_range().  You should pass the same
 1132  *      flags and the same range that you passed to vm_map_clip_range().
 1133  *      This code will clear the in-transition flag on the entries and
 1134  *      wake up anyone waiting.  This code will also simplify the sequence 
 1135  *      and attempt to merge it with entries before and after the sequence.
 1136  *
 1137  *      The map must be locked on entry and will remain locked on return.
 1138  *
 1139  *      Note that you should also pass the start_entry returned by 
 1140  *      vm_map_clip_range().  However, if you block between the two calls
 1141  *      with the map unlocked please be aware that the start_entry may
 1142  *      have been clipped and you may need to scan it backwards to find
 1143  *      the entry corresponding with the original start address.  You are
 1144  *      responsible for this, vm_map_unclip_range() expects the correct
 1145  *      start_entry to be passed to it and will KASSERT otherwise.
 1146  */
 1147 static
 1148 void
 1149 vm_map_unclip_range(
 1150         vm_map_t map,
 1151         vm_map_entry_t start_entry,
 1152         vm_offset_t start,
 1153         vm_offset_t end,
 1154         int flags)
 1155 {
 1156         vm_map_entry_t entry;
 1157 
 1158         entry = start_entry;
 1159 
 1160         KASSERT(entry->start == start, ("unclip_range: illegal base entry"));
 1161         while (entry != &map->header && entry->start < end) {
 1162                 KASSERT(entry->eflags & MAP_ENTRY_IN_TRANSITION, ("in-transition flag not set during unclip on: %p", entry));
 1163                 KASSERT(entry->end <= end, ("unclip_range: tail wasn't clipped"));
 1164                 entry->eflags &= ~MAP_ENTRY_IN_TRANSITION;
 1165                 if (entry->eflags & MAP_ENTRY_NEEDS_WAKEUP) {
 1166                         entry->eflags &= ~MAP_ENTRY_NEEDS_WAKEUP;
 1167                         wakeup(map);
 1168                 }
 1169                 entry = entry->next;
 1170         }
 1171 
 1172         /*
 1173          * Simplification does not block so there is no restart case.
 1174          */
 1175         entry = start_entry;
 1176         while (entry != &map->header && entry->start < end) {
 1177                 vm_map_simplify_entry(map, entry);
 1178                 entry = entry->next;
 1179         }
 1180 }
 1181 
 1182 /*
 1183  *      vm_map_submap:          [ kernel use only ]
 1184  *
 1185  *      Mark the given range as handled by a subordinate map.
 1186  *
 1187  *      This range must have been created with vm_map_find,
 1188  *      and no other operations may have been performed on this
 1189  *      range prior to calling vm_map_submap.
 1190  *
 1191  *      Only a limited number of operations can be performed
 1192  *      within this rage after calling vm_map_submap:
 1193  *              vm_fault
 1194  *      [Don't try vm_map_copy!]
 1195  *
 1196  *      To remove a submapping, one must first remove the
 1197  *      range from the superior map, and then destroy the
 1198  *      submap (if desired).  [Better yet, don't try it.]
 1199  */
 1200 int
 1201 vm_map_submap(map, start, end, submap)
 1202         vm_map_t map;
 1203         vm_offset_t start;
 1204         vm_offset_t end;
 1205         vm_map_t submap;
 1206 {
 1207         vm_map_entry_t entry;
 1208         int result = KERN_INVALID_ARGUMENT;
 1209 
 1210         vm_map_lock(map);
 1211 
 1212         VM_MAP_RANGE_CHECK(map, start, end);
 1213 
 1214         if (vm_map_lookup_entry(map, start, &entry)) {
 1215                 vm_map_clip_start(map, entry, start);
 1216         } else {
 1217                 entry = entry->next;
 1218         }
 1219 
 1220         vm_map_clip_end(map, entry, end);
 1221 
 1222         if ((entry->start == start) && (entry->end == end) &&
 1223             ((entry->eflags & MAP_ENTRY_COW) == 0) &&
 1224             (entry->object.vm_object == NULL)) {
 1225                 entry->object.sub_map = submap;
 1226                 entry->eflags |= MAP_ENTRY_IS_SUB_MAP;
 1227                 result = KERN_SUCCESS;
 1228         }
 1229         vm_map_unlock(map);
 1230 
 1231         return (result);
 1232 }
 1233 
 1234 /*
 1235  *      vm_map_protect:
 1236  *
 1237  *      Sets the protection of the specified address
 1238  *      region in the target map.  If "set_max" is
 1239  *      specified, the maximum protection is to be set;
 1240  *      otherwise, only the current protection is affected.
 1241  */
 1242 int
 1243 vm_map_protect(vm_map_t map, vm_offset_t start, vm_offset_t end,
 1244                vm_prot_t new_prot, boolean_t set_max)
 1245 {
 1246         vm_map_entry_t current;
 1247         vm_map_entry_t entry;
 1248 
 1249         vm_map_lock(map);
 1250 
 1251         VM_MAP_RANGE_CHECK(map, start, end);
 1252 
 1253         if (vm_map_lookup_entry(map, start, &entry)) {
 1254                 vm_map_clip_start(map, entry, start);
 1255         } else {
 1256                 entry = entry->next;
 1257         }
 1258 
 1259         /*
 1260          * Make a first pass to check for protection violations.
 1261          */
 1262 
 1263         current = entry;
 1264         while ((current != &map->header) && (current->start < end)) {
 1265                 if (current->eflags & MAP_ENTRY_IS_SUB_MAP) {
 1266                         vm_map_unlock(map);
 1267                         return (KERN_INVALID_ARGUMENT);
 1268                 }
 1269                 if ((new_prot & current->max_protection) != new_prot) {
 1270                         vm_map_unlock(map);
 1271                         return (KERN_PROTECTION_FAILURE);
 1272                 }
 1273                 current = current->next;
 1274         }
 1275 
 1276         /*
 1277          * Go back and fix up protections. [Note that clipping is not
 1278          * necessary the second time.]
 1279          */
 1280 
 1281         current = entry;
 1282 
 1283         while ((current != &map->header) && (current->start < end)) {
 1284                 vm_prot_t old_prot;
 1285 
 1286                 vm_map_clip_end(map, current, end);
 1287 
 1288                 old_prot = current->protection;
 1289                 if (set_max)
 1290                         current->protection =
 1291                             (current->max_protection = new_prot) &
 1292                             old_prot;
 1293                 else
 1294                         current->protection = new_prot;
 1295 
 1296                 /*
 1297                  * Update physical map if necessary. Worry about copy-on-write
 1298                  * here -- CHECK THIS XXX
 1299                  */
 1300 
 1301                 if (current->protection != old_prot) {
 1302 #define MASK(entry)     (((entry)->eflags & MAP_ENTRY_COW) ? ~VM_PROT_WRITE : \
 1303                                                         VM_PROT_ALL)
 1304 
 1305                         pmap_protect(map->pmap, current->start,
 1306                             current->end,
 1307                             current->protection & MASK(current));
 1308 #undef  MASK
 1309                 }
 1310 
 1311                 vm_map_simplify_entry(map, current);
 1312 
 1313                 current = current->next;
 1314         }
 1315 
 1316         vm_map_unlock(map);
 1317         return (KERN_SUCCESS);
 1318 }
 1319 
 1320 /*
 1321  *      vm_map_madvise:
 1322  *
 1323  *      This routine traverses a processes map handling the madvise
 1324  *      system call.  Advisories are classified as either those effecting
 1325  *      the vm_map_entry structure, or those effecting the underlying 
 1326  *      objects.
 1327  */
 1328 
 1329 int
 1330 vm_map_madvise(map, start, end, behav)
 1331         vm_map_t map;
 1332         vm_offset_t start, end;
 1333         int behav;
 1334 {
 1335         vm_map_entry_t current, entry;
 1336         int modify_map = 0;
 1337 
 1338         /*
 1339          * Some madvise calls directly modify the vm_map_entry, in which case
 1340          * we need to use an exclusive lock on the map and we need to perform 
 1341          * various clipping operations.  Otherwise we only need a read-lock
 1342          * on the map.
 1343          */
 1344 
 1345         switch(behav) {
 1346         case MADV_NORMAL:
 1347         case MADV_SEQUENTIAL:
 1348         case MADV_RANDOM:
 1349         case MADV_NOSYNC:
 1350         case MADV_AUTOSYNC:
 1351         case MADV_NOCORE:
 1352         case MADV_CORE:
 1353                 modify_map = 1;
 1354                 vm_map_lock(map);
 1355                 break;
 1356         case MADV_WILLNEED:
 1357         case MADV_DONTNEED:
 1358         case MADV_FREE:
 1359                 vm_map_lock_read(map);
 1360                 break;
 1361         default:
 1362                 return (KERN_INVALID_ARGUMENT);
 1363         }
 1364 
 1365         /*
 1366          * Locate starting entry and clip if necessary.
 1367          */
 1368 
 1369         VM_MAP_RANGE_CHECK(map, start, end);
 1370 
 1371         if (vm_map_lookup_entry(map, start, &entry)) {
 1372                 if (modify_map)
 1373                         vm_map_clip_start(map, entry, start);
 1374         } else {
 1375                 entry = entry->next;
 1376         }
 1377 
 1378         if (modify_map) {
 1379                 /*
 1380                  * madvise behaviors that are implemented in the vm_map_entry.
 1381                  *
 1382                  * We clip the vm_map_entry so that behavioral changes are
 1383                  * limited to the specified address range.
 1384                  */
 1385                 for (current = entry;
 1386                      (current != &map->header) && (current->start < end);
 1387                      current = current->next
 1388                 ) {
 1389                         if (current->eflags & MAP_ENTRY_IS_SUB_MAP)
 1390                                 continue;
 1391 
 1392                         vm_map_clip_end(map, current, end);
 1393 
 1394                         switch (behav) {
 1395                         case MADV_NORMAL:
 1396                                 vm_map_entry_set_behavior(current, MAP_ENTRY_BEHAV_NORMAL);
 1397                                 break;
 1398                         case MADV_SEQUENTIAL:
 1399                                 vm_map_entry_set_behavior(current, MAP_ENTRY_BEHAV_SEQUENTIAL);
 1400                                 break;
 1401                         case MADV_RANDOM:
 1402                                 vm_map_entry_set_behavior(current, MAP_ENTRY_BEHAV_RANDOM);
 1403                                 break;
 1404                         case MADV_NOSYNC:
 1405                                 current->eflags |= MAP_ENTRY_NOSYNC;
 1406                                 break;
 1407                         case MADV_AUTOSYNC:
 1408                                 current->eflags &= ~MAP_ENTRY_NOSYNC;
 1409                                 break;
 1410                         case MADV_NOCORE:
 1411                                 current->eflags |= MAP_ENTRY_NOCOREDUMP;
 1412                                 break;
 1413                         case MADV_CORE:
 1414                                 current->eflags &= ~MAP_ENTRY_NOCOREDUMP;
 1415                                 break;
 1416                         default:
 1417                                 break;
 1418                         }
 1419                         vm_map_simplify_entry(map, current);
 1420                 }
 1421                 vm_map_unlock(map);
 1422         } else {
 1423                 vm_pindex_t pindex;
 1424                 int count;
 1425 
 1426                 /*
 1427                  * madvise behaviors that are implemented in the underlying
 1428                  * vm_object.
 1429                  *
 1430                  * Since we don't clip the vm_map_entry, we have to clip
 1431                  * the vm_object pindex and count.
 1432                  */
 1433                 for (current = entry;
 1434                      (current != &map->header) && (current->start < end);
 1435                      current = current->next
 1436                 ) {
 1437                         vm_offset_t useStart;
 1438 
 1439                         if (current->eflags & MAP_ENTRY_IS_SUB_MAP)
 1440                                 continue;
 1441 
 1442                         pindex = OFF_TO_IDX(current->offset);
 1443                         count = atop(current->end - current->start);
 1444                         useStart = current->start;
 1445 
 1446                         if (current->start < start) {
 1447                                 pindex += atop(start - current->start);
 1448                                 count -= atop(start - current->start);
 1449                                 useStart = start;
 1450                         }
 1451                         if (current->end > end)
 1452                                 count -= atop(current->end - end);
 1453 
 1454                         if (count <= 0)
 1455                                 continue;
 1456 
 1457                         vm_object_madvise(current->object.vm_object,
 1458                                           pindex, count, behav);
 1459                         if (behav == MADV_WILLNEED) {
 1460                                 pmap_object_init_pt(
 1461                                     map->pmap, 
 1462                                     useStart,
 1463                                     current->protection,
 1464                                     current->object.vm_object,
 1465                                     pindex, 
 1466                                     (count << PAGE_SHIFT),
 1467                                     MAP_PREFAULT_MADVISE
 1468                                 );
 1469                         }
 1470                 }
 1471                 vm_map_unlock_read(map);
 1472         }
 1473         return(0);
 1474 }       
 1475 
 1476 
 1477 /*
 1478  *      vm_map_inherit:
 1479  *
 1480  *      Sets the inheritance of the specified address
 1481  *      range in the target map.  Inheritance
 1482  *      affects how the map will be shared with
 1483  *      child maps at the time of vm_map_fork.
 1484  */
 1485 int
 1486 vm_map_inherit(vm_map_t map, vm_offset_t start, vm_offset_t end,
 1487                vm_inherit_t new_inheritance)
 1488 {
 1489         vm_map_entry_t entry;
 1490         vm_map_entry_t temp_entry;
 1491 
 1492         switch (new_inheritance) {
 1493         case VM_INHERIT_NONE:
 1494         case VM_INHERIT_COPY:
 1495         case VM_INHERIT_SHARE:
 1496                 break;
 1497         default:
 1498                 return (KERN_INVALID_ARGUMENT);
 1499         }
 1500 
 1501         vm_map_lock(map);
 1502 
 1503         VM_MAP_RANGE_CHECK(map, start, end);
 1504 
 1505         if (vm_map_lookup_entry(map, start, &temp_entry)) {
 1506                 entry = temp_entry;
 1507                 vm_map_clip_start(map, entry, start);
 1508         } else
 1509                 entry = temp_entry->next;
 1510 
 1511         while ((entry != &map->header) && (entry->start < end)) {
 1512                 vm_map_clip_end(map, entry, end);
 1513 
 1514                 entry->inheritance = new_inheritance;
 1515 
 1516                 vm_map_simplify_entry(map, entry);
 1517 
 1518                 entry = entry->next;
 1519         }
 1520 
 1521         vm_map_unlock(map);
 1522         return (KERN_SUCCESS);
 1523 }
 1524 
 1525 /*
 1526  * Implement the semantics of mlock
 1527  */
 1528 int
 1529 vm_map_user_pageable(map, start, real_end, new_pageable)
 1530         vm_map_t map;
 1531         vm_offset_t start;
 1532         vm_offset_t real_end;
 1533         boolean_t new_pageable;
 1534 {
 1535         vm_map_entry_t entry;
 1536         vm_map_entry_t start_entry;
 1537         vm_offset_t end;
 1538         boolean_t fictitious;
 1539         int rv = KERN_SUCCESS;
 1540 
 1541         vm_map_lock(map);
 1542         VM_MAP_RANGE_CHECK(map, start, real_end);
 1543         end = real_end;
 1544 
 1545         start_entry = vm_map_clip_range(map, start, end, MAP_CLIP_NO_HOLES);
 1546         if (start_entry == NULL) {
 1547                 vm_map_unlock(map);
 1548                 return (KERN_INVALID_ADDRESS);
 1549         }
 1550 
 1551         if (new_pageable == 0) {
 1552                 entry = start_entry;
 1553                 while ((entry != &map->header) && (entry->start < end)) {
 1554                         vm_offset_t save_start;
 1555                         vm_offset_t save_end;
 1556 
 1557                         /*
 1558                          * Already user wired or hard wired (trivial cases)
 1559                          */
 1560                         if (entry->eflags & MAP_ENTRY_USER_WIRED) {
 1561                                 entry = entry->next;
 1562                                 continue;
 1563                         }
 1564                         if (entry->wired_count != 0) {
 1565                                 entry->wired_count++;
 1566                                 entry->eflags |= MAP_ENTRY_USER_WIRED;
 1567                                 entry = entry->next;
 1568                                 continue;
 1569                         }
 1570 
 1571                         /*
 1572                          * A new wiring requires instantiation of appropriate
 1573                          * management structures and the faulting in of the
 1574                          * page.
 1575                          */
 1576                         if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) {
 1577                                 int copyflag = entry->eflags & MAP_ENTRY_NEEDS_COPY;
 1578                                 if (copyflag && ((entry->protection & VM_PROT_WRITE) != 0)) {
 1579 
 1580                                         vm_object_shadow(&entry->object.vm_object,
 1581                                             &entry->offset,
 1582                                             atop(entry->end - entry->start));
 1583                                         entry->eflags &= ~MAP_ENTRY_NEEDS_COPY;
 1584 
 1585                                 } else if (entry->object.vm_object == NULL &&
 1586                                            !map->system_map) {
 1587 
 1588                                         entry->object.vm_object =
 1589                                             vm_object_allocate(OBJT_DEFAULT,
 1590                                                 atop(entry->end - entry->start));
 1591                                         entry->offset = (vm_offset_t) 0;
 1592 
 1593                                 }
 1594                         }
 1595                         entry->wired_count++;
 1596                         entry->eflags |= MAP_ENTRY_USER_WIRED;
 1597 
 1598                         /*
 1599                          * Now fault in the area.  The map lock needs to be
 1600                          * manipulated to avoid deadlocks.  The in-transition
 1601                          * flag protects the entries. 
 1602                          */
 1603                         fictitious = entry->object.vm_object != NULL &&
 1604                             entry->object.vm_object->type == OBJT_DEVICE;
 1605                         save_start = entry->start;
 1606                         save_end = entry->end;
 1607                         vm_map_unlock(map);
 1608                         map->timestamp++;
 1609                         rv = vm_fault_wire(map, save_start, save_end, 1, fictitious);
 1610                         vm_map_lock(map);
 1611                         if (rv) {
 1612                                 CLIP_CHECK_BACK(entry, save_start);
 1613                                 for (;;) {
 1614                                         KASSERT(entry->wired_count == 1, ("bad wired_count on entry"));
 1615                                         entry->eflags &= ~MAP_ENTRY_USER_WIRED;
 1616                                         entry->wired_count = 0;
 1617                                         if (entry->end == save_end)
 1618                                                 break;
 1619                                         entry = entry->next;
 1620                                         KASSERT(entry != &map->header, ("bad entry clip during backout"));
 1621                                 }
 1622                                 end = save_start;       /* unwire the rest */
 1623                                 break;
 1624                         }
 1625                         /*
 1626                          * note that even though the entry might have been
 1627                          * clipped, the USER_WIRED flag we set prevents
 1628                          * duplication so we do not have to do a 
 1629                          * clip check.
 1630                          */
 1631                         entry = entry->next;
 1632                 }
 1633 
 1634                 /*
 1635                  * If we failed fall through to the unwiring section to
 1636                  * unwire what we had wired so far.  'end' has already
 1637                  * been adjusted.
 1638                  */
 1639                 if (rv)
 1640                         new_pageable = 1;
 1641 
 1642                 /*
 1643                  * start_entry might have been clipped if we unlocked the
 1644                  * map and blocked.  No matter how clipped it has gotten
 1645                  * there should be a fragment that is on our start boundary.
 1646                  */
 1647                 CLIP_CHECK_BACK(start_entry, start);
 1648         }
 1649 
 1650         /*
 1651          * Deal with the unwiring case.
 1652          */
 1653         if (new_pageable) {
 1654                 /*
 1655                  * This is the unwiring case.  We must first ensure that the
 1656                  * range to be unwired is really wired down.  We know there
 1657                  * are no holes.
 1658                  */
 1659                 entry = start_entry;
 1660                 while ((entry != &map->header) && (entry->start < end)) {
 1661                         if ((entry->eflags & MAP_ENTRY_USER_WIRED) == 0) {
 1662                                 rv = KERN_INVALID_ARGUMENT;
 1663                                 goto done;
 1664                         }
 1665                         KASSERT(entry->wired_count != 0, ("wired count was 0 with USER_WIRED set! %p", entry));
 1666                         entry = entry->next;
 1667                 }
 1668 
 1669                 /*
 1670                  * Now decrement the wiring count for each region. If a region
 1671                  * becomes completely unwired, unwire its physical pages and
 1672                  * mappings.
 1673                  */
 1674                 entry = start_entry;
 1675                 while ((entry != &map->header) && (entry->start < end)) {
 1676                         KASSERT(entry->eflags & MAP_ENTRY_USER_WIRED, ("expected USER_WIRED on entry %p", entry));
 1677                         entry->eflags &= ~MAP_ENTRY_USER_WIRED;
 1678                         entry->wired_count--;
 1679                         if (entry->wired_count == 0)
 1680                                 vm_fault_unwire(map, entry->start, entry->end,
 1681                                     entry->object.vm_object != NULL &&
 1682                                     entry->object.vm_object->type == OBJT_DEVICE);
 1683                         entry = entry->next;
 1684                 }
 1685         }
 1686 done:
 1687         vm_map_unclip_range(map, start_entry, start, real_end, 
 1688                 MAP_CLIP_NO_HOLES);
 1689         map->timestamp++;
 1690         vm_map_unlock(map);
 1691         return (rv);
 1692 }
 1693 
 1694 /*
 1695  *      vm_map_pageable:
 1696  *
 1697  *      Sets the pageability of the specified address
 1698  *      range in the target map.  Regions specified
 1699  *      as not pageable require locked-down physical
 1700  *      memory and physical page maps.
 1701  *
 1702  *      The map must not be locked, but a reference
 1703  *      must remain to the map throughout the call.
 1704  */
 1705 int
 1706 vm_map_pageable(map, start, real_end, new_pageable)
 1707         vm_map_t map;
 1708         vm_offset_t start;
 1709         vm_offset_t real_end;
 1710         boolean_t new_pageable;
 1711 {
 1712         vm_map_entry_t entry;
 1713         vm_map_entry_t start_entry;
 1714         vm_offset_t end;
 1715         boolean_t fictitious;
 1716         int rv = KERN_SUCCESS;
 1717         int s;
 1718 
 1719         vm_map_lock(map);
 1720         VM_MAP_RANGE_CHECK(map, start, real_end);
 1721         end = real_end;
 1722 
 1723         start_entry = vm_map_clip_range(map, start, end, MAP_CLIP_NO_HOLES);
 1724         if (start_entry == NULL) {
 1725                 vm_map_unlock(map);
 1726                 return (KERN_INVALID_ADDRESS);
 1727         }
 1728         if (new_pageable == 0) {
 1729                 /*
 1730                  * Wiring.  
 1731                  *
 1732                  * 1.  Holding the write lock, we create any shadow or zero-fill
 1733                  * objects that need to be created. Then we clip each map
 1734                  * entry to the region to be wired and increment its wiring
 1735                  * count.  We create objects before clipping the map entries
 1736                  * to avoid object proliferation.
 1737                  *
 1738                  * 2.  We downgrade to a read lock, and call vm_fault_wire to
 1739                  * fault in the pages for any newly wired area (wired_count is
 1740                  * 1).
 1741                  *
 1742                  * Downgrading to a read lock for vm_fault_wire avoids a 
 1743                  * possible deadlock with another process that may have faulted
 1744                  * on one of the pages to be wired (it would mark the page busy,
 1745                  * blocking us, then in turn block on the map lock that we
 1746                  * hold).  Because of problems in the recursive lock package,
 1747                  * we cannot upgrade to a write lock in vm_map_lookup.  Thus,
 1748                  * any actions that require the write lock must be done
 1749                  * beforehand.  Because we keep the read lock on the map, the
 1750                  * copy-on-write status of the entries we modify here cannot
 1751                  * change.
 1752                  */
 1753 
 1754                 entry = start_entry;
 1755                 while ((entry != &map->header) && (entry->start < end)) {
 1756                         /*
 1757                          * Trivial case if the entry is already wired
 1758                          */
 1759                         if (entry->wired_count) {
 1760                                 entry->wired_count++;
 1761                                 entry = entry->next;
 1762                                 continue;
 1763                         }
 1764 
 1765                         /*
 1766                          * The entry is being newly wired, we have to setup
 1767                          * appropriate management structures.  A shadow 
 1768                          * object is required for a copy-on-write region,
 1769                          * or a normal object for a zero-fill region.  We
 1770                          * do not have to do this for entries that point to sub
 1771                          * maps because we won't hold the lock on the sub map.
 1772                          */
 1773                         if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) {
 1774                                 int copyflag = entry->eflags & MAP_ENTRY_NEEDS_COPY;
 1775                                 if (copyflag &&
 1776                                     ((entry->protection & VM_PROT_WRITE) != 0)) {
 1777 
 1778                                         vm_object_shadow(&entry->object.vm_object,
 1779                                             &entry->offset,
 1780                                             atop(entry->end - entry->start));
 1781                                         entry->eflags &= ~MAP_ENTRY_NEEDS_COPY;
 1782                                 } else if (entry->object.vm_object == NULL &&
 1783                                            !map->system_map) {
 1784                                         entry->object.vm_object =
 1785                                             vm_object_allocate(OBJT_DEFAULT,
 1786                                                 atop(entry->end - entry->start));
 1787                                         entry->offset = (vm_offset_t) 0;
 1788                                 }
 1789                         }
 1790 
 1791                         entry->wired_count++;
 1792                         entry = entry->next;
 1793                 }
 1794 
 1795                 /*
 1796                  * Pass 2.
 1797                  */
 1798 
 1799                 /*
 1800                  * HACK HACK HACK HACK
 1801                  *
 1802                  * Unlock the map to avoid deadlocks.  The in-transit flag
 1803                  * protects us from most changes but note that
 1804                  * clipping may still occur.  To prevent clipping from
 1805                  * occuring after the unlock, except for when we are
 1806                  * blocking in vm_fault_wire, we must run at splvm().
 1807                  * Otherwise our accesses to entry->start and entry->end
 1808                  * could be corrupted.  We have to set splvm() prior to
 1809                  * unlocking so start_entry does not change out from
 1810                  * under us at the very beginning of the loop.
 1811                  *
 1812                  * HACK HACK HACK HACK
 1813                  */
 1814 
 1815                 s = splvm();
 1816                 vm_map_unlock(map);
 1817 
 1818                 entry = start_entry;
 1819                 while (entry != &map->header && entry->start < end) {
 1820                         /*
 1821                          * If vm_fault_wire fails for any page we need to undo
 1822                          * what has been done.  We decrement the wiring count
 1823                          * for those pages which have not yet been wired (now)
 1824                          * and unwire those that have (later).
 1825                          */
 1826                         vm_offset_t save_start = entry->start;
 1827                         vm_offset_t save_end = entry->end;
 1828 
 1829                         fictitious = entry->object.vm_object != NULL &&
 1830                             entry->object.vm_object->type == OBJT_DEVICE;
 1831                         if (entry->wired_count == 1)
 1832                                 rv = vm_fault_wire(map, entry->start, entry->end, 0, fictitious);
 1833                         if (rv) {
 1834                                 CLIP_CHECK_BACK(entry, save_start);
 1835                                 for (;;) {
 1836                                         KASSERT(entry->wired_count == 1, ("wired_count changed unexpectedly"));
 1837                                         entry->wired_count = 0;
 1838                                         if (entry->end == save_end)
 1839                                                 break;
 1840                                         entry = entry->next;
 1841                                         KASSERT(entry != &map->header, ("bad entry clip during backout"));
 1842                                 }
 1843                                 end = save_start;
 1844                                 break;
 1845                         }
 1846                         CLIP_CHECK_FWD(entry, save_end);
 1847                         entry = entry->next;
 1848                 }
 1849                 splx(s);
 1850 
 1851                 /*
 1852                  * relock.  start_entry is still IN_TRANSITION and must
 1853                  * still exist, but may have been clipped (handled just
 1854                  * below).
 1855                  */
 1856                 vm_map_lock(map);
 1857 
 1858                 /*
 1859                  * If a failure occured undo everything by falling through
 1860                  * to the unwiring code.  'end' has already been adjusted
 1861                  * appropriately.
 1862                  */
 1863                 if (rv)
 1864                         new_pageable = 1;
 1865 
 1866                 /*
 1867                  * start_entry might have been clipped if we unlocked the
 1868                  * map and blocked.  No matter how clipped it has gotten
 1869                  * there should be a fragment that is on our start boundary.
 1870                  */
 1871                 CLIP_CHECK_BACK(start_entry, start);
 1872         }
 1873 
 1874         if (new_pageable) {
 1875                 /*
 1876                  * This is the unwiring case.  We must first ensure that the
 1877                  * range to be unwired is really wired down.  We know there
 1878                  * are no holes.
 1879                  */
 1880                 entry = start_entry;
 1881                 while ((entry != &map->header) && (entry->start < end)) {
 1882                         if (entry->wired_count == 0) {
 1883                                 rv = KERN_INVALID_ARGUMENT;
 1884                                 goto done;
 1885                         }
 1886                         entry = entry->next;
 1887                 }
 1888 
 1889                 /*
 1890                  * Now decrement the wiring count for each region. If a region
 1891                  * becomes completely unwired, unwire its physical pages and
 1892                  * mappings.
 1893                  */
 1894                 entry = start_entry;
 1895                 while ((entry != &map->header) && (entry->start < end)) {
 1896                         entry->wired_count--;
 1897                         if (entry->wired_count == 0)
 1898                                 vm_fault_unwire(map, entry->start, entry->end,
 1899                                     entry->object.vm_object != NULL &&
 1900                                     entry->object.vm_object->type == OBJT_DEVICE);
 1901                         entry = entry->next;
 1902                 }
 1903         }
 1904 done:
 1905         vm_map_unclip_range(map, start_entry, start, real_end, 
 1906                 MAP_CLIP_NO_HOLES);
 1907         map->timestamp++;
 1908         vm_map_unlock(map);
 1909         return (rv);
 1910 }
 1911 
 1912 /*
 1913  * vm_map_clean
 1914  *
 1915  * Push any dirty cached pages in the address range to their pager.
 1916  * If syncio is TRUE, dirty pages are written synchronously.
 1917  * If invalidate is TRUE, any cached pages are freed as well.
 1918  *
 1919  * Returns an error if any part of the specified range is not mapped.
 1920  */
 1921 int
 1922 vm_map_clean(map, start, end, syncio, invalidate)
 1923         vm_map_t map;
 1924         vm_offset_t start;
 1925         vm_offset_t end;
 1926         boolean_t syncio;
 1927         boolean_t invalidate;
 1928 {
 1929         vm_map_entry_t current;
 1930         vm_map_entry_t entry;
 1931         vm_size_t size;
 1932         vm_object_t object;
 1933         vm_ooffset_t offset;
 1934 
 1935         vm_map_lock_read(map);
 1936         VM_MAP_RANGE_CHECK(map, start, end);
 1937         if (!vm_map_lookup_entry(map, start, &entry)) {
 1938                 vm_map_unlock_read(map);
 1939                 return (KERN_INVALID_ADDRESS);
 1940         }
 1941         /*
 1942          * Make a first pass to check for holes.
 1943          */
 1944         for (current = entry; current->start < end; current = current->next) {
 1945                 if (current->eflags & MAP_ENTRY_IS_SUB_MAP) {
 1946                         vm_map_unlock_read(map);
 1947                         return (KERN_INVALID_ARGUMENT);
 1948                 }
 1949                 if (end > current->end &&
 1950                     (current->next == &map->header ||
 1951                         current->end != current->next->start)) {
 1952                         vm_map_unlock_read(map);
 1953                         return (KERN_INVALID_ADDRESS);
 1954                 }
 1955         }
 1956 
 1957         if (invalidate)
 1958                 pmap_remove(vm_map_pmap(map), start, end);
 1959         /*
 1960          * Make a second pass, cleaning/uncaching pages from the indicated
 1961          * objects as we go.
 1962          */
 1963         for (current = entry; current->start < end; current = current->next) {
 1964                 offset = current->offset + (start - current->start);
 1965                 size = (end <= current->end ? end : current->end) - start;
 1966                 if (current->eflags & MAP_ENTRY_IS_SUB_MAP) {
 1967                         vm_map_t smap;
 1968                         vm_map_entry_t tentry;
 1969                         vm_size_t tsize;
 1970 
 1971                         smap = current->object.sub_map;
 1972                         vm_map_lock_read(smap);
 1973                         (void) vm_map_lookup_entry(smap, offset, &tentry);
 1974                         tsize = tentry->end - offset;
 1975                         if (tsize < size)
 1976                                 size = tsize;
 1977                         object = tentry->object.vm_object;
 1978                         offset = tentry->offset + (offset - tentry->start);
 1979                         vm_map_unlock_read(smap);
 1980                 } else {
 1981                         object = current->object.vm_object;
 1982                 }
 1983                 /*
 1984                  * Note that there is absolutely no sense in writing out
 1985                  * anonymous objects, so we track down the vnode object
 1986                  * to write out.
 1987                  * We invalidate (remove) all pages from the address space
 1988                  * anyway, for semantic correctness.
 1989                  *
 1990                  * note: certain anonymous maps, such as MAP_NOSYNC maps,
 1991                  * may start out with a NULL object.
 1992                  */
 1993                 while (object && object->backing_object) {
 1994                         offset += object->backing_object_offset;
 1995                         object = object->backing_object;
 1996                         if (object->size < OFF_TO_IDX( offset + size))
 1997                                 size = IDX_TO_OFF(object->size) - offset;
 1998                 }
 1999                 if (object && (object->type == OBJT_VNODE) && 
 2000                     (current->protection & VM_PROT_WRITE)) {
 2001                         /*
 2002                          * Flush pages if writing is allowed, invalidate them
 2003                          * if invalidation requested.  Pages undergoing I/O
 2004                          * will be ignored by vm_object_page_remove().
 2005                          *
 2006                          * We cannot lock the vnode and then wait for paging
 2007                          * to complete without deadlocking against vm_fault.
 2008                          * Instead we simply call vm_object_page_remove() and
 2009                          * allow it to block internally on a page-by-page 
 2010                          * basis when it encounters pages undergoing async 
 2011                          * I/O.
 2012                          */
 2013                         int flags;
 2014 
 2015                         vm_object_reference(object);
 2016                         vn_lock(object->handle, LK_EXCLUSIVE | LK_RETRY, curproc);
 2017                         flags = (syncio || invalidate) ? OBJPC_SYNC : 0;
 2018                         flags |= invalidate ? OBJPC_INVAL : 0;
 2019                         vm_object_page_clean(object,
 2020                             OFF_TO_IDX(offset),
 2021                             OFF_TO_IDX(offset + size + PAGE_MASK),
 2022                             flags);
 2023                         VOP_UNLOCK(object->handle, 0, curproc);
 2024                         vm_object_deallocate(object);
 2025                 }
 2026                 if (object && invalidate &&
 2027                    ((object->type == OBJT_VNODE) ||
 2028                     (object->type == OBJT_DEVICE))) {
 2029                         vm_object_reference(object);
 2030                         vm_object_page_remove(object,
 2031                             OFF_TO_IDX(offset),
 2032                             OFF_TO_IDX(offset + size + PAGE_MASK),
 2033                             old_msync ? FALSE : TRUE);
 2034                         vm_object_deallocate(object);
 2035                 }
 2036                 start += size;
 2037         }
 2038 
 2039         vm_map_unlock_read(map);
 2040         return (KERN_SUCCESS);
 2041 }
 2042 
 2043 /*
 2044  *      vm_map_entry_unwire:    [ internal use only ]
 2045  *
 2046  *      Make the region specified by this entry pageable.
 2047  *
 2048  *      The map in question should be locked.
 2049  *      [This is the reason for this routine's existence.]
 2050  */
 2051 static void 
 2052 vm_map_entry_unwire(map, entry)
 2053         vm_map_t map;
 2054         vm_map_entry_t entry;
 2055 {
 2056         vm_fault_unwire(map, entry->start, entry->end,
 2057             entry->object.vm_object != NULL &&
 2058             entry->object.vm_object->type == OBJT_DEVICE);
 2059         entry->wired_count = 0;
 2060 }
 2061 
 2062 /*
 2063  *      vm_map_entry_delete:    [ internal use only ]
 2064  *
 2065  *      Deallocate the given entry from the target map.
 2066  */
 2067 static void
 2068 vm_map_entry_delete(map, entry)
 2069         vm_map_t map;
 2070         vm_map_entry_t entry;
 2071 {
 2072         vm_map_entry_unlink(map, entry);
 2073         map->size -= entry->end - entry->start;
 2074 
 2075         if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) {
 2076                 vm_object_deallocate(entry->object.vm_object);
 2077         }
 2078 
 2079         vm_map_entry_dispose(map, entry);
 2080 }
 2081 
 2082 /*
 2083  *      vm_map_delete:  [ internal use only ]
 2084  *
 2085  *      Deallocates the given address range from the target
 2086  *      map.
 2087  */
 2088 int
 2089 vm_map_delete(map, start, end)
 2090         vm_map_t map;
 2091         vm_offset_t start;
 2092         vm_offset_t end;
 2093 {
 2094         vm_object_t object;
 2095         vm_map_entry_t entry;
 2096         vm_map_entry_t first_entry;
 2097 
 2098         /*
 2099          * Find the start of the region, and clip it
 2100          */
 2101 
 2102 again:
 2103         if (!vm_map_lookup_entry(map, start, &first_entry))
 2104                 entry = first_entry->next;
 2105         else {
 2106                 entry = first_entry;
 2107                 vm_map_clip_start(map, entry, start);
 2108                 /*
 2109                  * Fix the lookup hint now, rather than each time though the
 2110                  * loop.
 2111                  */
 2112                 SAVE_HINT(map, entry->prev);
 2113         }
 2114 
 2115         /*
 2116          * Save the free space hint
 2117          */
 2118 
 2119         if (entry == &map->header) {
 2120                 map->first_free = &map->header;
 2121         } else if (map->first_free->start >= start) {
 2122                 map->first_free = entry->prev;
 2123         }
 2124 
 2125         /*
 2126          * Step through all entries in this region
 2127          */
 2128 
 2129         while ((entry != &map->header) && (entry->start < end)) {
 2130                 vm_map_entry_t next;
 2131                 vm_offset_t s, e;
 2132                 vm_pindex_t offidxstart, offidxend, count;
 2133 
 2134                 /*
 2135                  * If we hit an in-transition entry we have to sleep and
 2136                  * retry.  It's easier (and not really slower) to just retry
 2137                  * since this case occurs so rarely and the hint is already
 2138                  * pointing at the right place.  We have to reset the
 2139                  * start offset so as not to accidently delete an entry
 2140                  * another process just created in vacated space.
 2141                  */
 2142                 if (entry->eflags & MAP_ENTRY_IN_TRANSITION) {
 2143                         entry->eflags |= MAP_ENTRY_NEEDS_WAKEUP;
 2144                         start = entry->start;
 2145                         ++cnt.v_intrans_coll;
 2146                         ++cnt.v_intrans_wait;
 2147                         vm_map_transition_wait(map);
 2148                         goto again;
 2149                 }
 2150                 vm_map_clip_end(map, entry, end);
 2151 
 2152                 s = entry->start;
 2153                 e = entry->end;
 2154                 next = entry->next;
 2155 
 2156                 offidxstart = OFF_TO_IDX(entry->offset);
 2157                 count = OFF_TO_IDX(e - s);
 2158                 object = entry->object.vm_object;
 2159 
 2160                 /*
 2161                  * Unwire before removing addresses from the pmap; otherwise,
 2162                  * unwiring will put the entries back in the pmap.
 2163                  */
 2164                 if (entry->wired_count != 0) {
 2165                         vm_map_entry_unwire(map, entry);
 2166                 }
 2167 
 2168                 offidxend = offidxstart + count;
 2169 
 2170                 if ((object == kernel_object) || (object == kmem_object)) {
 2171                         vm_object_page_remove(object, offidxstart, offidxend, FALSE);
 2172                 } else {
 2173                         pmap_remove(map->pmap, s, e);
 2174                         if (object != NULL &&
 2175                             object->ref_count != 1 &&
 2176                             (object->flags & (OBJ_NOSPLIT|OBJ_ONEMAPPING)) == OBJ_ONEMAPPING &&
 2177                             (object->type == OBJT_DEFAULT || object->type == OBJT_SWAP)) {
 2178                                 vm_object_collapse(object);
 2179                                 vm_object_page_remove(object, offidxstart, offidxend, FALSE);
 2180                                 if (object->type == OBJT_SWAP) {
 2181                                         swap_pager_freespace(object, offidxstart, count);
 2182                                 }
 2183                                 if (offidxend >= object->size &&
 2184                                     offidxstart < object->size) {
 2185                                         object->size = offidxstart;
 2186                                 }
 2187                         }
 2188                 }
 2189 
 2190                 /*
 2191                  * Delete the entry (which may delete the object) only after
 2192                  * removing all pmap entries pointing to its pages.
 2193                  * (Otherwise, its page frames may be reallocated, and any
 2194                  * modify bits will be set in the wrong object!)
 2195                  */
 2196                 vm_map_entry_delete(map, entry);
 2197                 entry = next;
 2198         }
 2199         return (KERN_SUCCESS);
 2200 }
 2201 
 2202 /*
 2203  *      vm_map_remove:
 2204  *
 2205  *      Remove the given address range from the target map.
 2206  *      This is the exported form of vm_map_delete.
 2207  */
 2208 int
 2209 vm_map_remove(map, start, end)
 2210         vm_map_t map;
 2211         vm_offset_t start;
 2212         vm_offset_t end;
 2213 {
 2214         int result, s = 0;
 2215 
 2216         if (map == kmem_map || map == mb_map)
 2217                 s = splvm();
 2218 
 2219         vm_map_lock(map);
 2220         VM_MAP_RANGE_CHECK(map, start, end);
 2221         result = vm_map_delete(map, start, end);
 2222         vm_map_unlock(map);
 2223 
 2224         if (map == kmem_map || map == mb_map)
 2225                 splx(s);
 2226 
 2227         return (result);
 2228 }
 2229 
 2230 /*
 2231  *      vm_map_check_protection:
 2232  *
 2233  *      Assert that the target map allows the specified
 2234  *      privilege on the entire address region given.
 2235  *      The entire region must be allocated.
 2236  */
 2237 boolean_t
 2238 vm_map_check_protection(vm_map_t map, vm_offset_t start, vm_offset_t end,
 2239                         vm_prot_t protection)
 2240 {
 2241         vm_map_entry_t entry;
 2242         vm_map_entry_t tmp_entry;
 2243 
 2244         if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
 2245                 return (FALSE);
 2246         }
 2247         entry = tmp_entry;
 2248 
 2249         while (start < end) {
 2250                 if (entry == &map->header) {
 2251                         return (FALSE);
 2252                 }
 2253                 /*
 2254                  * No holes allowed!
 2255                  */
 2256 
 2257                 if (start < entry->start) {
 2258                         return (FALSE);
 2259                 }
 2260                 /*
 2261                  * Check protection associated with entry.
 2262                  */
 2263 
 2264                 if ((entry->protection & protection) != protection) {
 2265                         return (FALSE);
 2266                 }
 2267                 /* go to next entry */
 2268 
 2269                 start = entry->end;
 2270                 entry = entry->next;
 2271         }
 2272         return (TRUE);
 2273 }
 2274 
 2275 /*
 2276  * Split the pages in a map entry into a new object.  This affords
 2277  * easier removal of unused pages, and keeps object inheritance from
 2278  * being a negative impact on memory usage.
 2279  */
 2280 static void
 2281 vm_map_split(entry)
 2282         vm_map_entry_t entry;
 2283 {
 2284         vm_page_t m;
 2285         vm_object_t orig_object, new_object, source;
 2286         vm_offset_t s, e;
 2287         vm_pindex_t offidxstart, offidxend, idx;
 2288         vm_size_t size;
 2289         vm_ooffset_t offset;
 2290 
 2291         orig_object = entry->object.vm_object;
 2292         if (orig_object->type != OBJT_DEFAULT && orig_object->type != OBJT_SWAP)
 2293                 return;
 2294         if (orig_object->ref_count <= 1)
 2295                 return;
 2296 
 2297         offset = entry->offset;
 2298         s = entry->start;
 2299         e = entry->end;
 2300 
 2301         offidxstart = OFF_TO_IDX(offset);
 2302         offidxend = offidxstart + OFF_TO_IDX(e - s);
 2303         size = offidxend - offidxstart;
 2304 
 2305         new_object = vm_pager_allocate(orig_object->type,
 2306                 NULL, IDX_TO_OFF(size), VM_PROT_ALL, 0LL);
 2307         if (new_object == NULL)
 2308                 return;
 2309 
 2310         source = orig_object->backing_object;
 2311         if (source != NULL) {
 2312                 vm_object_reference(source);    /* Referenced by new_object */
 2313                 LIST_INSERT_HEAD(&source->shadow_head,
 2314                                   new_object, shadow_list);
 2315                 vm_object_clear_flag(source, OBJ_ONEMAPPING);
 2316                 new_object->backing_object_offset = 
 2317                         orig_object->backing_object_offset + IDX_TO_OFF(offidxstart);
 2318                 new_object->backing_object = source;
 2319                 source->shadow_count++;
 2320                 source->generation++;
 2321         }
 2322 
 2323         for (idx = 0; idx < size; idx++) {
 2324                 vm_page_t m;
 2325 
 2326         retry:
 2327                 m = vm_page_lookup(orig_object, offidxstart + idx);
 2328                 if (m == NULL)
 2329                         continue;
 2330 
 2331                 /*
 2332                  * We must wait for pending I/O to complete before we can
 2333                  * rename the page.
 2334                  *
 2335                  * We do not have to VM_PROT_NONE the page as mappings should
 2336                  * not be changed by this operation.
 2337                  */
 2338                 if (vm_page_sleep_busy(m, TRUE, "spltwt"))
 2339                         goto retry;
 2340                         
 2341                 vm_page_busy(m);
 2342                 vm_page_rename(m, new_object, idx);
 2343                 /* page automatically made dirty by rename and cache handled */
 2344                 vm_page_busy(m);
 2345         }
 2346 
 2347         if (orig_object->type == OBJT_SWAP) {
 2348                 vm_object_pip_add(orig_object, 1);
 2349                 /*
 2350                  * copy orig_object pages into new_object
 2351                  * and destroy unneeded pages in
 2352                  * shadow object.
 2353                  */
 2354                 swap_pager_copy(orig_object, new_object, offidxstart, 0);
 2355                 vm_object_pip_wakeup(orig_object);
 2356         }
 2357 
 2358         for (idx = 0; idx < size; idx++) {
 2359                 m = vm_page_lookup(new_object, idx);
 2360                 if (m) {
 2361                         vm_page_wakeup(m);
 2362                 }
 2363         }
 2364 
 2365         entry->object.vm_object = new_object;
 2366         entry->offset = 0LL;
 2367         vm_object_deallocate(orig_object);
 2368 }
 2369 
 2370 /*
 2371  *      vm_map_copy_entry:
 2372  *
 2373  *      Copies the contents of the source entry to the destination
 2374  *      entry.  The entries *must* be aligned properly.
 2375  */
 2376 static void
 2377 vm_map_copy_entry(src_map, dst_map, src_entry, dst_entry)
 2378         vm_map_t src_map, dst_map;
 2379         vm_map_entry_t src_entry, dst_entry;
 2380 {
 2381         vm_object_t src_object;
 2382 
 2383         if ((dst_entry->eflags|src_entry->eflags) & MAP_ENTRY_IS_SUB_MAP)
 2384                 return;
 2385 
 2386         if (src_entry->wired_count == 0) {
 2387 
 2388                 /*
 2389                  * If the source entry is marked needs_copy, it is already
 2390                  * write-protected.
 2391                  */
 2392                 if ((src_entry->eflags & MAP_ENTRY_NEEDS_COPY) == 0) {
 2393                         pmap_protect(src_map->pmap,
 2394                             src_entry->start,
 2395                             src_entry->end,
 2396                             src_entry->protection & ~VM_PROT_WRITE);
 2397                 }
 2398 
 2399                 /*
 2400                  * Make a copy of the object.
 2401                  */
 2402                 if ((src_object = src_entry->object.vm_object) != NULL) {
 2403 
 2404                         if ((src_object->handle == NULL) &&
 2405                                 (src_object->type == OBJT_DEFAULT ||
 2406                                  src_object->type == OBJT_SWAP)) {
 2407                                 vm_object_collapse(src_object);
 2408                                 if ((src_object->flags & (OBJ_NOSPLIT|OBJ_ONEMAPPING)) == OBJ_ONEMAPPING) {
 2409                                         vm_map_split(src_entry);
 2410                                         src_object = src_entry->object.vm_object;
 2411                                 }
 2412                         }
 2413 
 2414                         vm_object_reference(src_object);
 2415                         vm_object_clear_flag(src_object, OBJ_ONEMAPPING);
 2416                         dst_entry->object.vm_object = src_object;
 2417                         src_entry->eflags |= (MAP_ENTRY_COW|MAP_ENTRY_NEEDS_COPY);
 2418                         dst_entry->eflags |= (MAP_ENTRY_COW|MAP_ENTRY_NEEDS_COPY);
 2419                         dst_entry->offset = src_entry->offset;
 2420                 } else {
 2421                         dst_entry->object.vm_object = NULL;
 2422                         dst_entry->offset = 0;
 2423                 }
 2424 
 2425                 pmap_copy(dst_map->pmap, src_map->pmap, dst_entry->start,
 2426                     dst_entry->end - dst_entry->start, src_entry->start);
 2427         } else {
 2428                 /*
 2429                  * Of course, wired down pages can't be set copy-on-write.
 2430                  * Cause wired pages to be copied into the new map by
 2431                  * simulating faults (the new pages are pageable)
 2432                  */
 2433                 vm_fault_copy_entry(dst_map, src_map, dst_entry, src_entry);
 2434         }
 2435 }
 2436 
 2437 /*
 2438  * vmspace_fork:
 2439  * Create a new process vmspace structure and vm_map
 2440  * based on those of an existing process.  The new map
 2441  * is based on the old map, according to the inheritance
 2442  * values on the regions in that map.
 2443  *
 2444  * The source map must not be locked.
 2445  */
 2446 struct vmspace *
 2447 vmspace_fork(vm1)
 2448         struct vmspace *vm1;
 2449 {
 2450         struct vmspace *vm2;
 2451         vm_map_t old_map = &vm1->vm_map;
 2452         vm_map_t new_map;
 2453         vm_map_entry_t old_entry;
 2454         vm_map_entry_t new_entry;
 2455         vm_object_t object;
 2456 
 2457         vm_map_lock(old_map);
 2458         old_map->infork = 1;
 2459 
 2460         vm2 = vmspace_alloc(old_map->min_offset, old_map->max_offset);
 2461         bcopy(&vm1->vm_startcopy, &vm2->vm_startcopy,
 2462             (caddr_t) &vm1->vm_endcopy - (caddr_t) &vm1->vm_startcopy);
 2463         new_map = &vm2->vm_map; /* XXX */
 2464         new_map->timestamp = 1;
 2465 
 2466         old_entry = old_map->header.next;
 2467 
 2468         while (old_entry != &old_map->header) {
 2469                 if (old_entry->eflags & MAP_ENTRY_IS_SUB_MAP)
 2470                         panic("vm_map_fork: encountered a submap");
 2471 
 2472                 switch (old_entry->inheritance) {
 2473                 case VM_INHERIT_NONE:
 2474                         break;
 2475 
 2476                 case VM_INHERIT_SHARE:
 2477                         /*
 2478                          * Clone the entry, creating the shared object if necessary.
 2479                          */
 2480                         object = old_entry->object.vm_object;
 2481                         if (object == NULL) {
 2482                                 object = vm_object_allocate(OBJT_DEFAULT,
 2483                                         atop(old_entry->end - old_entry->start));
 2484                                 old_entry->object.vm_object = object;
 2485                                 old_entry->offset = (vm_offset_t) 0;
 2486                         }
 2487 
 2488                         /*
 2489                          * Add the reference before calling vm_object_shadow
 2490                          * to insure that a shadow object is created.
 2491                          */
 2492                         vm_object_reference(object);
 2493                         if (old_entry->eflags & MAP_ENTRY_NEEDS_COPY) {
 2494                                 vm_object_shadow(&old_entry->object.vm_object,
 2495                                         &old_entry->offset,
 2496                                         atop(old_entry->end - old_entry->start));
 2497                                 old_entry->eflags &= ~MAP_ENTRY_NEEDS_COPY;
 2498                                 /* Transfer the second reference too. */
 2499                                 vm_object_reference(
 2500                                     old_entry->object.vm_object);
 2501                                 vm_object_deallocate(object);
 2502                                 object = old_entry->object.vm_object;
 2503                         }
 2504                         vm_object_clear_flag(object, OBJ_ONEMAPPING);
 2505 
 2506                         /*
 2507                          * Clone the entry, referencing the shared object.
 2508                          */
 2509                         new_entry = vm_map_entry_create(new_map);
 2510                         *new_entry = *old_entry;
 2511                         new_entry->eflags &= ~MAP_ENTRY_USER_WIRED;
 2512                         new_entry->wired_count = 0;
 2513 
 2514                         /*
 2515                          * Insert the entry into the new map -- we know we're
 2516                          * inserting at the end of the new map.
 2517                          */
 2518 
 2519                         vm_map_entry_link(new_map, new_map->header.prev,
 2520                             new_entry);
 2521 
 2522                         /*
 2523                          * Update the physical map
 2524                          */
 2525 
 2526                         pmap_copy(new_map->pmap, old_map->pmap,
 2527                             new_entry->start,
 2528                             (old_entry->end - old_entry->start),
 2529                             old_entry->start);
 2530                         break;
 2531 
 2532                 case VM_INHERIT_COPY:
 2533                         /*
 2534                          * Clone the entry and link into the map.
 2535                          */
 2536                         new_entry = vm_map_entry_create(new_map);
 2537                         *new_entry = *old_entry;
 2538                         new_entry->eflags &= ~MAP_ENTRY_USER_WIRED;
 2539                         new_entry->wired_count = 0;
 2540                         new_entry->object.vm_object = NULL;
 2541                         vm_map_entry_link(new_map, new_map->header.prev,
 2542                             new_entry);
 2543                         vm_map_copy_entry(old_map, new_map, old_entry,
 2544                             new_entry);
 2545                         break;
 2546                 }
 2547                 old_entry = old_entry->next;
 2548         }
 2549 
 2550         new_map->size = old_map->size;
 2551         old_map->infork = 0;
 2552         vm_map_unlock(old_map);
 2553 
 2554         return (vm2);
 2555 }
 2556 
 2557 int
 2558 vm_map_stack (vm_map_t map, vm_offset_t addrbos, vm_size_t max_ssize,
 2559               vm_prot_t prot, vm_prot_t max, int cow)
 2560 {
 2561         vm_map_entry_t prev_entry;
 2562         vm_map_entry_t new_stack_entry;
 2563         vm_size_t      init_ssize;
 2564         int            rv;
 2565 
 2566         if (VM_MIN_ADDRESS > 0 && addrbos < VM_MIN_ADDRESS)
 2567                 return (KERN_NO_SPACE);
 2568         if (addrbos > map->max_offset)
 2569                 return (KERN_NO_SPACE);
 2570         if (max_ssize < sgrowsiz)
 2571                 init_ssize = max_ssize;
 2572         else
 2573                 init_ssize = sgrowsiz;
 2574 
 2575         vm_map_lock(map);
 2576 
 2577         /* If addr is already mapped, no go */
 2578         if (vm_map_lookup_entry(map, addrbos, &prev_entry)) {
 2579                 vm_map_unlock(map);
 2580                 return (KERN_NO_SPACE);
 2581         }
 2582 
 2583         /* If we would blow our VMEM resource limit, no go */
 2584         if (map->size + init_ssize >
 2585             curproc->p_rlimit[RLIMIT_VMEM].rlim_cur) {
 2586                 vm_map_unlock(map);
 2587                 return (KERN_NO_SPACE);
 2588         }
 2589 
 2590         /* If we can't accomodate max_ssize in the current mapping,
 2591          * no go.  However, we need to be aware that subsequent user
 2592          * mappings might map into the space we have reserved for
 2593          * stack, and currently this space is not protected.  
 2594          * 
 2595          * Hopefully we will at least detect this condition 
 2596          * when we try to grow the stack.
 2597          */
 2598         if ((prev_entry->next != &map->header) &&
 2599             (prev_entry->next->start < addrbos + max_ssize)) {
 2600                 vm_map_unlock(map);
 2601                 return (KERN_NO_SPACE);
 2602         }
 2603 
 2604         /* We initially map a stack of only init_ssize.  We will
 2605          * grow as needed later.  Since this is to be a grow 
 2606          * down stack, we map at the top of the range.
 2607          *
 2608          * Note: we would normally expect prot and max to be
 2609          * VM_PROT_ALL, and cow to be 0.  Possibly we should
 2610          * eliminate these as input parameters, and just
 2611          * pass these values here in the insert call.
 2612          */
 2613         rv = vm_map_insert(map, NULL, 0, addrbos + max_ssize - init_ssize,
 2614                            addrbos + max_ssize, prot, max, cow);
 2615 
 2616         /* Now set the avail_ssize amount */
 2617         if (rv == KERN_SUCCESS){
 2618                 if (prev_entry != &map->header)
 2619                         vm_map_clip_end(map, prev_entry, addrbos + max_ssize - init_ssize);
 2620                 new_stack_entry = prev_entry->next;
 2621                 if (new_stack_entry->end   != addrbos + max_ssize ||
 2622                     new_stack_entry->start != addrbos + max_ssize - init_ssize)
 2623                         panic ("Bad entry start/end for new stack entry");
 2624                 else 
 2625                         new_stack_entry->avail_ssize = max_ssize - init_ssize;
 2626         }
 2627 
 2628         vm_map_unlock(map);
 2629         return (rv);
 2630 }
 2631 
 2632 /* Attempts to grow a vm stack entry.  Returns KERN_SUCCESS if the
 2633  * desired address is already mapped, or if we successfully grow
 2634  * the stack.  Also returns KERN_SUCCESS if addr is outside the
 2635  * stack range (this is strange, but preserves compatibility with
 2636  * the grow function in vm_machdep.c).
 2637  */
 2638 int
 2639 vm_map_growstack (struct proc *p, vm_offset_t addr)
 2640 {
 2641         vm_map_entry_t prev_entry;
 2642         vm_map_entry_t stack_entry;
 2643         vm_map_entry_t new_stack_entry;
 2644         struct vmspace *vm = p->p_vmspace;
 2645         vm_map_t map = &vm->vm_map;
 2646         vm_offset_t    end;
 2647         int      grow_amount;
 2648         int      rv = KERN_SUCCESS;
 2649         int      is_procstack;
 2650         int      use_read_lock = 1;
 2651 
 2652 Retry:
 2653         if (use_read_lock)
 2654                 vm_map_lock_read(map);
 2655         else
 2656                 vm_map_lock(map);
 2657 
 2658         /* If addr is already in the entry range, no need to grow.*/
 2659         if (vm_map_lookup_entry(map, addr, &prev_entry))
 2660                 goto done;
 2661 
 2662         if ((stack_entry = prev_entry->next) == &map->header)
 2663                 goto done;
 2664         if (prev_entry == &map->header) 
 2665                 end = stack_entry->start - stack_entry->avail_ssize;
 2666         else
 2667                 end = prev_entry->end;
 2668 
 2669         /* This next test mimics the old grow function in vm_machdep.c.
 2670          * It really doesn't quite make sense, but we do it anyway
 2671          * for compatibility.
 2672          *
 2673          * If not growable stack, return success.  This signals the
 2674          * caller to proceed as he would normally with normal vm.
 2675          */
 2676         if (stack_entry->avail_ssize < 1 ||
 2677             addr >= stack_entry->start ||
 2678             addr <  stack_entry->start - stack_entry->avail_ssize) {
 2679                 goto done;
 2680         } 
 2681         
 2682         /* Find the minimum grow amount */
 2683         grow_amount = roundup (stack_entry->start - addr, PAGE_SIZE);
 2684         if (grow_amount > stack_entry->avail_ssize) {
 2685                 rv = KERN_NO_SPACE;
 2686                 goto done;
 2687         }
 2688 
 2689         /* If there is no longer enough space between the entries
 2690          * nogo, and adjust the available space.  Note: this 
 2691          * should only happen if the user has mapped into the
 2692          * stack area after the stack was created, and is
 2693          * probably an error.
 2694          *
 2695          * This also effectively destroys any guard page the user
 2696          * might have intended by limiting the stack size.
 2697          */
 2698         if (grow_amount > stack_entry->start - end) {
 2699                 if (use_read_lock && vm_map_lock_upgrade(map)) {
 2700                         use_read_lock = 0;
 2701                         goto Retry;
 2702                 }
 2703                 use_read_lock = 0;
 2704                 stack_entry->avail_ssize = stack_entry->start - end;
 2705                 rv = KERN_NO_SPACE;
 2706                 goto done;
 2707         }
 2708 
 2709         is_procstack = addr >= (vm_offset_t)vm->vm_maxsaddr;
 2710 
 2711         /* If this is the main process stack, see if we're over the 
 2712          * stack limit.
 2713          */
 2714         if (is_procstack && (ctob(vm->vm_ssize) + grow_amount >
 2715                              p->p_rlimit[RLIMIT_STACK].rlim_cur)) {
 2716                 rv = KERN_NO_SPACE;
 2717                 goto done;
 2718         }
 2719 
 2720         /* Round up the grow amount modulo SGROWSIZ */
 2721         grow_amount = roundup (grow_amount, sgrowsiz);
 2722         if (grow_amount > stack_entry->avail_ssize) {
 2723                 grow_amount = stack_entry->avail_ssize;
 2724         }
 2725         if (is_procstack && (ctob(vm->vm_ssize) + grow_amount >
 2726                              p->p_rlimit[RLIMIT_STACK].rlim_cur)) {
 2727                 grow_amount = p->p_rlimit[RLIMIT_STACK].rlim_cur -
 2728                               ctob(vm->vm_ssize);
 2729         }
 2730 
 2731         /* If we would blow our VMEM resource limit, no go */
 2732         if (map->size + grow_amount >
 2733             curproc->p_rlimit[RLIMIT_VMEM].rlim_cur) {
 2734                 rv = KERN_NO_SPACE;
 2735                 goto done;
 2736         }
 2737 
 2738         if (use_read_lock && vm_map_lock_upgrade(map)) {
 2739                 use_read_lock = 0;
 2740                 goto Retry;
 2741         }
 2742         use_read_lock = 0;
 2743 
 2744         /* Get the preliminary new entry start value */
 2745         addr = stack_entry->start - grow_amount;
 2746 
 2747         /* If this puts us into the previous entry, cut back our growth
 2748          * to the available space.  Also, see the note above.
 2749          */
 2750         if (addr < end) {
 2751                 stack_entry->avail_ssize = stack_entry->start - end;
 2752                 addr = end;
 2753         }
 2754 
 2755         rv = vm_map_insert(map, NULL, 0, addr, stack_entry->start,
 2756                            VM_PROT_ALL,
 2757                            VM_PROT_ALL,
 2758                            0);
 2759 
 2760         /* Adjust the available stack space by the amount we grew. */
 2761         if (rv == KERN_SUCCESS) {
 2762                 if (prev_entry != &map->header)
 2763                         vm_map_clip_end(map, prev_entry, addr);
 2764                 new_stack_entry = prev_entry->next;
 2765                 if (new_stack_entry->end   != stack_entry->start  ||
 2766                     new_stack_entry->start != addr)
 2767                         panic ("Bad stack grow start/end in new stack entry");
 2768                 else {
 2769                         new_stack_entry->avail_ssize = stack_entry->avail_ssize -
 2770                                                         (new_stack_entry->end -
 2771                                                          new_stack_entry->start);
 2772                         if (is_procstack)
 2773                                 vm->vm_ssize += btoc(new_stack_entry->end -
 2774                                                      new_stack_entry->start);
 2775                 }
 2776         }
 2777 
 2778 done:
 2779         if (use_read_lock)
 2780                 vm_map_unlock_read(map);
 2781         else
 2782                 vm_map_unlock(map);
 2783         return (rv);
 2784 }
 2785 
 2786 /*
 2787  * Unshare the specified VM space for exec.  If other processes are
 2788  * mapped to it, then create a new one.  The new vmspace is null.
 2789  */
 2790 
 2791 void
 2792 vmspace_exec(struct proc *p) {
 2793         struct vmspace *oldvmspace = p->p_vmspace;
 2794         struct vmspace *newvmspace;
 2795         vm_map_t map = &p->p_vmspace->vm_map;
 2796 
 2797         newvmspace = vmspace_alloc(map->min_offset, map->max_offset);
 2798         bcopy(&oldvmspace->vm_startcopy, &newvmspace->vm_startcopy,
 2799             (caddr_t) &newvmspace->vm_endcopy -
 2800             (caddr_t) &newvmspace->vm_startcopy);
 2801         /*
 2802          * This code is written like this for prototype purposes.  The
 2803          * goal is to avoid running down the vmspace here, but let the
 2804          * other process's that are still using the vmspace to finally
 2805          * run it down.  Even though there is little or no chance of blocking
 2806          * here, it is a good idea to keep this form for future mods.
 2807          */
 2808         vmspace_free(oldvmspace);
 2809         p->p_vmspace = newvmspace;
 2810         pmap_pinit2(vmspace_pmap(newvmspace));
 2811         if (p == curproc)
 2812                 pmap_activate(p);
 2813 }
 2814 
 2815 /*
 2816  * Unshare the specified VM space for forcing COW.  This
 2817  * is called by rfork, for the (RFMEM|RFPROC) == 0 case.
 2818  */
 2819 
 2820 void
 2821 vmspace_unshare(struct proc *p) {
 2822         struct vmspace *oldvmspace = p->p_vmspace;
 2823         struct vmspace *newvmspace;
 2824 
 2825         if (oldvmspace->vm_refcnt == 1)
 2826                 return;
 2827         newvmspace = vmspace_fork(oldvmspace);
 2828         vmspace_free(oldvmspace);
 2829         p->p_vmspace = newvmspace;
 2830         pmap_pinit2(vmspace_pmap(newvmspace));
 2831         if (p == curproc)
 2832                 pmap_activate(p);
 2833 }
 2834         
 2835 
 2836 /*
 2837  *      vm_map_lookup:
 2838  *
 2839  *      Finds the VM object, offset, and
 2840  *      protection for a given virtual address in the
 2841  *      specified map, assuming a page fault of the
 2842  *      type specified.
 2843  *
 2844  *      Leaves the map in question locked for read; return
 2845  *      values are guaranteed until a vm_map_lookup_done
 2846  *      call is performed.  Note that the map argument
 2847  *      is in/out; the returned map must be used in
 2848  *      the call to vm_map_lookup_done.
 2849  *
 2850  *      A handle (out_entry) is returned for use in
 2851  *      vm_map_lookup_done, to make that fast.
 2852  *
 2853  *      If a lookup is requested with "write protection"
 2854  *      specified, the map may be changed to perform virtual
 2855  *      copying operations, although the data referenced will
 2856  *      remain the same.
 2857  */
 2858 int
 2859 vm_map_lookup(vm_map_t *var_map,                /* IN/OUT */
 2860               vm_offset_t vaddr,
 2861               vm_prot_t fault_typea,
 2862               vm_map_entry_t *out_entry,        /* OUT */
 2863               vm_object_t *object,              /* OUT */
 2864               vm_pindex_t *pindex,              /* OUT */
 2865               vm_prot_t *out_prot,              /* OUT */
 2866               boolean_t *wired)                 /* OUT */
 2867 {
 2868         vm_map_entry_t entry;
 2869         vm_map_t map = *var_map;
 2870         vm_prot_t prot;
 2871         vm_prot_t fault_type = fault_typea;
 2872         int use_read_lock = 1;
 2873         int rv = KERN_SUCCESS;
 2874 
 2875 RetryLookup:
 2876         if (use_read_lock)
 2877                 vm_map_lock_read(map);
 2878         else
 2879                 vm_map_lock(map);
 2880 
 2881         /*
 2882          * If the map has an interesting hint, try it before calling full
 2883          * blown lookup routine.
 2884          */
 2885         entry = map->hint;
 2886         *out_entry = entry;
 2887 
 2888         if ((entry == &map->header) ||
 2889             (vaddr < entry->start) || (vaddr >= entry->end)) {
 2890                 vm_map_entry_t tmp_entry;
 2891 
 2892                 /*
 2893                  * Entry was either not a valid hint, or the vaddr was not
 2894                  * contained in the entry, so do a full lookup.
 2895                  */
 2896                 if (!vm_map_lookup_entry(map, vaddr, &tmp_entry)) {
 2897                         rv = KERN_INVALID_ADDRESS;
 2898                         goto done;
 2899                 }
 2900 
 2901                 entry = tmp_entry;
 2902                 *out_entry = entry;
 2903         }
 2904         
 2905         /*
 2906          * Handle submaps.
 2907          */
 2908 
 2909         if (entry->eflags & MAP_ENTRY_IS_SUB_MAP) {
 2910                 vm_map_t old_map = map;
 2911 
 2912                 *var_map = map = entry->object.sub_map;
 2913                 if (use_read_lock)
 2914                         vm_map_unlock_read(old_map);
 2915                 else
 2916                         vm_map_unlock(old_map);
 2917                 use_read_lock = 1;
 2918                 goto RetryLookup;
 2919         }
 2920 
 2921         /*
 2922          * Check whether this task is allowed to have this page.
 2923          * Note the special case for MAP_ENTRY_COW
 2924          * pages with an override.  This is to implement a forced
 2925          * COW for debuggers.
 2926          */
 2927 
 2928         if (fault_type & VM_PROT_OVERRIDE_WRITE)
 2929                 prot = entry->max_protection;
 2930         else
 2931                 prot = entry->protection;
 2932 
 2933         fault_type &= (VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE);
 2934         if ((fault_type & prot) != fault_type) {
 2935                 rv = KERN_PROTECTION_FAILURE;
 2936                 goto done;
 2937         }
 2938 
 2939         if ((entry->eflags & MAP_ENTRY_USER_WIRED) &&
 2940             (entry->eflags & MAP_ENTRY_COW) &&
 2941             (fault_type & VM_PROT_WRITE) &&
 2942             (fault_typea & VM_PROT_OVERRIDE_WRITE) == 0) {
 2943                 rv = KERN_PROTECTION_FAILURE;
 2944                 goto done;
 2945         }
 2946 
 2947         /*
 2948          * If this page is not pageable, we have to get it for all possible
 2949          * accesses.
 2950          */
 2951 
 2952         *wired = (entry->wired_count != 0);
 2953         if (*wired)
 2954                 prot = fault_type = entry->protection;
 2955 
 2956         /*
 2957          * If the entry was copy-on-write, we either ...
 2958          */
 2959 
 2960         if (entry->eflags & MAP_ENTRY_NEEDS_COPY) {
 2961                 /*
 2962                  * If we want to write the page, we may as well handle that
 2963                  * now since we've got the map locked.
 2964                  *
 2965                  * If we don't need to write the page, we just demote the
 2966                  * permissions allowed.
 2967                  */
 2968 
 2969                 if (fault_type & VM_PROT_WRITE) {
 2970                         /*
 2971                          * Make a new object, and place it in the object
 2972                          * chain.  Note that no new references have appeared
 2973                          * -- one just moved from the map to the new
 2974                          * object.
 2975                          */
 2976 
 2977                         if (use_read_lock && vm_map_lock_upgrade(map)) {
 2978                                 use_read_lock = 0;
 2979                                 goto RetryLookup;
 2980                         }
 2981                         use_read_lock = 0;
 2982 
 2983                         vm_object_shadow(
 2984                             &entry->object.vm_object,
 2985                             &entry->offset,
 2986                             atop(entry->end - entry->start));
 2987 
 2988                         entry->eflags &= ~MAP_ENTRY_NEEDS_COPY;
 2989                 } else {
 2990                         /*
 2991                          * We're attempting to read a copy-on-write page --
 2992                          * don't allow writes.
 2993                          */
 2994 
 2995                         prot &= ~VM_PROT_WRITE;
 2996                 }
 2997         }
 2998 
 2999         /*
 3000          * Create an object if necessary.
 3001          */
 3002         if (entry->object.vm_object == NULL &&
 3003             !map->system_map) {
 3004                 if (use_read_lock && vm_map_lock_upgrade(map))  {
 3005                         use_read_lock = 0;
 3006                         goto RetryLookup;
 3007                 }
 3008                 use_read_lock = 0;
 3009                 entry->object.vm_object = vm_object_allocate(OBJT_DEFAULT,
 3010                     atop(entry->end - entry->start));
 3011                 entry->offset = 0;
 3012         }
 3013 
 3014         /*
 3015          * Return the object/offset from this entry.  If the entry was
 3016          * copy-on-write or empty, it has been fixed up.
 3017          */
 3018 
 3019         *pindex = OFF_TO_IDX((vaddr - entry->start) + entry->offset);
 3020         *object = entry->object.vm_object;
 3021 
 3022         /*
 3023          * Return whether this is the only map sharing this data.  On
 3024          * success we return with a read lock held on the map.  On failure
 3025          * we return with the map unlocked.
 3026          */
 3027         *out_prot = prot;
 3028 done:
 3029         if (rv == KERN_SUCCESS) {
 3030                 if (use_read_lock == 0)
 3031                         vm_map_lock_downgrade(map);
 3032         } else if (use_read_lock) {
 3033                 vm_map_unlock_read(map);
 3034         } else {
 3035                 vm_map_unlock(map);
 3036         }
 3037         return (rv);
 3038 }
 3039 
 3040 /*
 3041  *      vm_map_lookup_done:
 3042  *
 3043  *      Releases locks acquired by a vm_map_lookup
 3044  *      (according to the handle returned by that lookup).
 3045  */
 3046 
 3047 void
 3048 vm_map_lookup_done(map, entry)
 3049         vm_map_t map;
 3050         vm_map_entry_t entry;
 3051 {
 3052         /*
 3053          * Unlock the main-level map
 3054          */
 3055 
 3056         vm_map_unlock_read(map);
 3057 }
 3058 
 3059 /*
 3060  * Implement uiomove with VM operations.  This handles (and collateral changes)
 3061  * support every combination of source object modification, and COW type
 3062  * operations.
 3063  */
 3064 int
 3065 vm_uiomove(mapa, srcobject, cp, cnta, uaddra, npages)
 3066         vm_map_t mapa;
 3067         vm_object_t srcobject;
 3068         off_t cp;
 3069         int cnta;
 3070         vm_offset_t uaddra;
 3071         int *npages;
 3072 {
 3073         vm_map_t map;
 3074         vm_object_t first_object, oldobject, object;
 3075         vm_map_entry_t entry;
 3076         vm_prot_t prot;
 3077         boolean_t wired;
 3078         int tcnt, rv;
 3079         vm_offset_t uaddr, start, end, tend;
 3080         vm_pindex_t first_pindex, osize, oindex;
 3081         off_t ooffset;
 3082         int cnt;
 3083 
 3084         if (npages)
 3085                 *npages = 0;
 3086 
 3087         cnt = cnta;
 3088         uaddr = uaddra;
 3089 
 3090         while (cnt > 0) {
 3091                 map = mapa;
 3092 
 3093                 if ((vm_map_lookup(&map, uaddr,
 3094                         VM_PROT_READ, &entry, &first_object,
 3095                         &first_pindex, &prot, &wired)) != KERN_SUCCESS) {
 3096                         return EFAULT;
 3097                 }
 3098 
 3099                 vm_map_clip_start(map, entry, uaddr);
 3100 
 3101                 tcnt = cnt;
 3102                 tend = uaddr + tcnt;
 3103                 if (tend > entry->end) {
 3104                         tcnt = entry->end - uaddr;
 3105                         tend = entry->end;
 3106                 }
 3107 
 3108                 vm_map_clip_end(map, entry, tend);
 3109 
 3110                 start = entry->start;
 3111                 end = entry->end;
 3112 
 3113                 osize = atop(tcnt);
 3114 
 3115                 oindex = OFF_TO_IDX(cp);
 3116                 if (npages) {
 3117                         vm_pindex_t idx;
 3118                         for (idx = 0; idx < osize; idx++) {
 3119                                 vm_page_t m;
 3120                                 if ((m = vm_page_lookup(srcobject, oindex + idx)) == NULL) {
 3121                                         vm_map_lookup_done(map, entry);
 3122                                         return 0;
 3123                                 }
 3124                                 /*
 3125                                  * disallow busy or invalid pages, but allow
 3126                                  * m->busy pages if they are entirely valid.
 3127                                  */
 3128                                 if ((m->flags & PG_BUSY) ||
 3129                                         ((m->valid & VM_PAGE_BITS_ALL) != VM_PAGE_BITS_ALL)) {
 3130                                         vm_map_lookup_done(map, entry);
 3131                                         return 0;
 3132                                 }
 3133                         }
 3134                 }
 3135 
 3136 /*
 3137  * If we are changing an existing map entry, just redirect
 3138  * the object, and change mappings.
 3139  */
 3140                 if ((first_object->type == OBJT_VNODE) &&
 3141                         ((oldobject = entry->object.vm_object) == first_object)) {
 3142 
 3143                         if ((entry->offset != cp) || (oldobject != srcobject)) {
 3144                                 /*
 3145                                 * Remove old window into the file
 3146                                 */
 3147                                 pmap_remove (map->pmap, uaddr, tend);
 3148 
 3149                                 /*
 3150                                 * Force copy on write for mmaped regions
 3151                                 */
 3152                                 vm_object_pmap_copy_1 (srcobject, oindex, oindex + osize);
 3153 
 3154                                 /*
 3155                                 * Point the object appropriately
 3156                                 */
 3157                                 if (oldobject != srcobject) {
 3158 
 3159                                 /*
 3160                                 * Set the object optimization hint flag
 3161                                 */
 3162                                         vm_object_set_flag(srcobject, OBJ_OPT);
 3163                                         vm_object_reference(srcobject);
 3164                                         entry->object.vm_object = srcobject;
 3165 
 3166                                         if (oldobject) {
 3167                                                 vm_object_deallocate(oldobject);
 3168                                         }
 3169                                 }
 3170 
 3171                                 entry->offset = cp;
 3172                                 map->timestamp++;
 3173                         } else {
 3174                                 pmap_remove (map->pmap, uaddr, tend);
 3175                         }
 3176 
 3177                 } else if ((first_object->ref_count == 1) &&
 3178                         (first_object->size == osize) &&
 3179                         ((first_object->type == OBJT_DEFAULT) ||
 3180                                 (first_object->type == OBJT_SWAP)) ) {
 3181 
 3182                         oldobject = first_object->backing_object;
 3183 
 3184                         if ((first_object->backing_object_offset != cp) ||
 3185                                 (oldobject != srcobject)) {
 3186                                 /*
 3187                                 * Remove old window into the file
 3188                                 */
 3189                                 pmap_remove (map->pmap, uaddr, tend);
 3190 
 3191                                 /*
 3192                                  * Remove unneeded old pages
 3193                                  */
 3194                                 vm_object_page_remove(first_object, 0, 0, 0);
 3195 
 3196                                 /*
 3197                                  * Invalidate swap space
 3198                                  */
 3199                                 if (first_object->type == OBJT_SWAP) {
 3200                                         swap_pager_freespace(first_object,
 3201                                                 0,
 3202                                                 first_object->size);
 3203                                 }
 3204 
 3205                                 /*
 3206                                 * Force copy on write for mmaped regions
 3207                                 */
 3208                                 vm_object_pmap_copy_1 (srcobject, oindex, oindex + osize);
 3209 
 3210                                 /*
 3211                                 * Point the object appropriately
 3212                                 */
 3213                                 if (oldobject != srcobject) {
 3214 
 3215                                 /*
 3216                                 * Set the object optimization hint flag
 3217                                 */
 3218                                         vm_object_set_flag(srcobject, OBJ_OPT);
 3219                                         vm_object_reference(srcobject);
 3220 
 3221                                         if (oldobject) {
 3222                                                 LIST_REMOVE(
 3223                                                         first_object, shadow_list);
 3224                                                 oldobject->shadow_count--;
 3225                                                 /* XXX bump generation? */
 3226                                                 vm_object_deallocate(oldobject);
 3227                                         }
 3228 
 3229                                         LIST_INSERT_HEAD(&srcobject->shadow_head,
 3230                                                 first_object, shadow_list);
 3231                                         srcobject->shadow_count++;
 3232                                         /* XXX bump generation? */
 3233 
 3234                                         first_object->backing_object = srcobject;
 3235                                 }
 3236                                 first_object->backing_object_offset = cp;
 3237                                 map->timestamp++;
 3238                         } else {
 3239                                 pmap_remove (map->pmap, uaddr, tend);
 3240                         }
 3241 /*
 3242  * Otherwise, we have to do a logical mmap.
 3243  */
 3244                 } else {
 3245 
 3246                         vm_object_set_flag(srcobject, OBJ_OPT);
 3247                         vm_object_reference(srcobject);
 3248 
 3249                         pmap_remove (map->pmap, uaddr, tend);
 3250 
 3251                         vm_object_pmap_copy_1 (srcobject, oindex, oindex + osize);
 3252                         vm_map_lock_upgrade(map);
 3253 
 3254                         if (entry == &map->header) {
 3255                                 map->first_free = &map->header;
 3256                         } else if (map->first_free->start >= start) {
 3257                                 map->first_free = entry->prev;
 3258                         }
 3259 
 3260                         SAVE_HINT(map, entry->prev);
 3261                         vm_map_entry_delete(map, entry);
 3262 
 3263                         object = srcobject;
 3264                         ooffset = cp;
 3265 
 3266                         rv = vm_map_insert(map, object, ooffset, start, tend,
 3267                                 VM_PROT_ALL, VM_PROT_ALL, MAP_COPY_ON_WRITE);
 3268 
 3269                         if (rv != KERN_SUCCESS)
 3270                                 panic("vm_uiomove: could not insert new entry: %d", rv);
 3271                 }
 3272 
 3273 /*
 3274  * Map the window directly, if it is already in memory
 3275  */
 3276                 pmap_object_init_pt(map->pmap, uaddr, entry->protection,
 3277                         srcobject, oindex, tcnt, 0);
 3278 
 3279                 map->timestamp++;
 3280                 vm_map_unlock(map);
 3281 
 3282                 cnt -= tcnt;
 3283                 uaddr += tcnt;
 3284                 cp += tcnt;
 3285                 if (npages)
 3286                         *npages += osize;
 3287         }
 3288         return 0;
 3289 }
 3290 
 3291 /*
 3292  * Performs the copy_on_write operations necessary to allow the virtual copies
 3293  * into user space to work.  This has to be called for write(2) system calls
 3294  * from other processes, file unlinking, and file size shrinkage.
 3295  */
 3296 void
 3297 vm_freeze_copyopts(object, froma, toa)
 3298         vm_object_t object;
 3299         vm_pindex_t froma, toa;
 3300 {
 3301         int rv;
 3302         vm_object_t robject;
 3303         vm_pindex_t idx;
 3304 
 3305         if ((object == NULL) ||
 3306                 ((object->flags & OBJ_OPT) == 0))
 3307                 return;
 3308 
 3309         if (object->shadow_count > object->ref_count)
 3310                 panic("vm_freeze_copyopts: sc > rc");
 3311 
 3312         while((robject = LIST_FIRST(&object->shadow_head)) != NULL) {
 3313                 vm_pindex_t bo_pindex;
 3314                 vm_page_t m_in, m_out;
 3315 
 3316                 bo_pindex = OFF_TO_IDX(robject->backing_object_offset);
 3317 
 3318                 vm_object_reference(robject);
 3319 
 3320                 vm_object_pip_wait(robject, "objfrz");
 3321 
 3322                 if (robject->ref_count == 1) {
 3323                         vm_object_deallocate(robject);
 3324                         continue;
 3325                 }
 3326 
 3327                 vm_object_pip_add(robject, 1);
 3328 
 3329                 for (idx = 0; idx < robject->size; idx++) {
 3330 
 3331                         m_out = vm_page_grab(robject, idx,
 3332                                                 VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
 3333 
 3334                         if (m_out->valid == 0) {
 3335                                 m_in = vm_page_grab(object, bo_pindex + idx,
 3336                                                 VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
 3337                                 if (m_in->valid == 0) {
 3338                                         rv = vm_pager_get_pages(object, &m_in, 1, 0);
 3339                                         if (rv != VM_PAGER_OK) {
 3340                                                 printf("vm_freeze_copyopts: cannot read page from file: %lx\n", (long)m_in->pindex);
 3341                                                 continue;
 3342                                         }
 3343                                         vm_page_deactivate(m_in);
 3344                                 }
 3345 
 3346                                 vm_page_protect(m_in, VM_PROT_NONE);
 3347                                 pmap_copy_page(VM_PAGE_TO_PHYS(m_in), VM_PAGE_TO_PHYS(m_out));
 3348                                 m_out->valid = m_in->valid;
 3349                                 vm_page_dirty(m_out);
 3350                                 vm_page_activate(m_out);
 3351                                 vm_page_wakeup(m_in);
 3352                         }
 3353                         vm_page_wakeup(m_out);
 3354                 }
 3355 
 3356                 object->shadow_count--;
 3357                 object->ref_count--;
 3358                 LIST_REMOVE(robject, shadow_list);
 3359                 robject->backing_object = NULL;
 3360                 robject->backing_object_offset = 0;
 3361 
 3362                 vm_object_pip_wakeup(robject);
 3363                 vm_object_deallocate(robject);
 3364         }
 3365 
 3366         vm_object_clear_flag(object, OBJ_OPT);
 3367 }
 3368 
 3369 #include "opt_ddb.h"
 3370 #ifdef DDB
 3371 #include <sys/kernel.h>
 3372 
 3373 #include <ddb/ddb.h>
 3374 
 3375 /*
 3376  *      vm_map_print:   [ debug ]
 3377  */
 3378 DB_SHOW_COMMAND(map, vm_map_print)
 3379 {
 3380         static int nlines;
 3381         /* XXX convert args. */
 3382         vm_map_t map = (vm_map_t)addr;
 3383         boolean_t full = have_addr;
 3384 
 3385         vm_map_entry_t entry;
 3386 
 3387         db_iprintf("Task map %p: pmap=%p, nentries=%d, version=%u\n",
 3388             (void *)map,
 3389             (void *)map->pmap, map->nentries, map->timestamp);
 3390         nlines++;
 3391 
 3392         if (!full && db_indent)
 3393                 return;
 3394 
 3395         db_indent += 2;
 3396         for (entry = map->header.next; entry != &map->header;
 3397             entry = entry->next) {
 3398                 db_iprintf("map entry %p: start=%p, end=%p\n",
 3399                     (void *)entry, (void *)entry->start, (void *)entry->end);
 3400                 nlines++;
 3401                 {
 3402                         static char *inheritance_name[4] =
 3403                         {"share", "copy", "none", "donate_copy"};
 3404 
 3405                         db_iprintf(" prot=%x/%x/%s",
 3406                             entry->protection,
 3407                             entry->max_protection,
 3408                             inheritance_name[(int)(unsigned char)entry->inheritance]);
 3409                         if (entry->wired_count != 0)
 3410                                 db_printf(", wired");
 3411                 }
 3412                 if (entry->eflags & MAP_ENTRY_IS_SUB_MAP) {
 3413                         /* XXX no %qd in kernel.  Truncate entry->offset. */
 3414                         db_printf(", share=%p, offset=0x%lx\n",
 3415                             (void *)entry->object.sub_map,
 3416                             (long)entry->offset);
 3417                         nlines++;
 3418                         if ((entry->prev == &map->header) ||
 3419                             (entry->prev->object.sub_map !=
 3420                                 entry->object.sub_map)) {
 3421                                 db_indent += 2;
 3422                                 vm_map_print((db_expr_t)(intptr_t)
 3423                                              entry->object.sub_map,
 3424                                              full, 0, (char *)0);
 3425                                 db_indent -= 2;
 3426                         }
 3427                 } else {
 3428                         /* XXX no %qd in kernel.  Truncate entry->offset. */
 3429                         db_printf(", object=%p, offset=0x%lx",
 3430                             (void *)entry->object.vm_object,
 3431                             (long)entry->offset);
 3432                         if (entry->eflags & MAP_ENTRY_COW)
 3433                                 db_printf(", copy (%s)",
 3434                                     (entry->eflags & MAP_ENTRY_NEEDS_COPY) ? "needed" : "done");
 3435                         db_printf("\n");
 3436                         nlines++;
 3437 
 3438                         if ((entry->prev == &map->header) ||
 3439                             (entry->prev->object.vm_object !=
 3440                                 entry->object.vm_object)) {
 3441                                 db_indent += 2;
 3442                                 vm_object_print((db_expr_t)(intptr_t)
 3443                                                 entry->object.vm_object,
 3444                                                 full, 0, (char *)0);
 3445                                 nlines += 4;
 3446                                 db_indent -= 2;
 3447                         }
 3448                 }
 3449         }
 3450         db_indent -= 2;
 3451         if (db_indent == 0)
 3452                 nlines = 0;
 3453 }
 3454 
 3455 
 3456 DB_SHOW_COMMAND(procvm, procvm)
 3457 {
 3458         struct proc *p;
 3459 
 3460         if (have_addr) {
 3461                 p = (struct proc *) addr;
 3462         } else {
 3463                 p = curproc;
 3464         }
 3465 
 3466         db_printf("p = %p, vmspace = %p, map = %p, pmap = %p\n",
 3467             (void *)p, (void *)p->p_vmspace, (void *)&p->p_vmspace->vm_map,
 3468             (void *)vmspace_pmap(p->p_vmspace));
 3469 
 3470         vm_map_print((db_expr_t)(intptr_t)&p->p_vmspace->vm_map, 1, 0, NULL);
 3471 }
 3472 
 3473 #endif /* DDB */

Cache object: d0993de21bf03391d018e62aad07e71d


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.