The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/vm/vm_map.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  * Copyright (c) 1991, 1993
    3  *      The Regents of the University of California.  All rights reserved.
    4  *
    5  * This code is derived from software contributed to Berkeley by
    6  * The Mach Operating System project at Carnegie-Mellon University.
    7  *
    8  * Redistribution and use in source and binary forms, with or without
    9  * modification, are permitted provided that the following conditions
   10  * are met:
   11  * 1. Redistributions of source code must retain the above copyright
   12  *    notice, this list of conditions and the following disclaimer.
   13  * 2. Redistributions in binary form must reproduce the above copyright
   14  *    notice, this list of conditions and the following disclaimer in the
   15  *    documentation and/or other materials provided with the distribution.
   16  * 3. All advertising materials mentioning features or use of this software
   17  *    must display the following acknowledgement:
   18  *      This product includes software developed by the University of
   19  *      California, Berkeley and its contributors.
   20  * 4. Neither the name of the University nor the names of its contributors
   21  *    may be used to endorse or promote products derived from this software
   22  *    without specific prior written permission.
   23  *
   24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   34  * SUCH DAMAGE.
   35  *
   36  *      from: @(#)vm_map.c      8.3 (Berkeley) 1/12/94
   37  *
   38  *
   39  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
   40  * All rights reserved.
   41  *
   42  * Authors: Avadis Tevanian, Jr., Michael Wayne Young
   43  *
   44  * Permission to use, copy, modify and distribute this software and
   45  * its documentation is hereby granted, provided that both the copyright
   46  * notice and this permission notice appear in all copies of the
   47  * software, derivative works or modified versions, and any portions
   48  * thereof, and that both notices appear in supporting documentation.
   49  *
   50  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
   51  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
   52  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
   53  *
   54  * Carnegie Mellon requests users of this software to return to
   55  *
   56  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
   57  *  School of Computer Science
   58  *  Carnegie Mellon University
   59  *  Pittsburgh PA 15213-3890
   60  *
   61  * any improvements or extensions that they make and grant Carnegie the
   62  * rights to redistribute these changes.
   63  *
   64  * $FreeBSD$
   65  */
   66 
   67 /*
   68  *      Virtual memory mapping module.
   69  */
   70 
   71 #include <sys/param.h>
   72 #include <sys/systm.h>
   73 #include <sys/malloc.h>
   74 #include <sys/proc.h>
   75 #include <sys/vmmeter.h>
   76 #include <sys/mman.h>
   77 #include <sys/vnode.h>
   78 #include <sys/resourcevar.h>
   79 
   80 #include <vm/vm.h>
   81 #include <vm/vm_param.h>
   82 #include <vm/vm_prot.h>
   83 #include <vm/vm_inherit.h>
   84 #include <sys/lock.h>
   85 #include <vm/pmap.h>
   86 #include <vm/vm_map.h>
   87 #include <vm/vm_page.h>
   88 #include <vm/vm_object.h>
   89 #include <vm/vm_pager.h>
   90 #include <vm/vm_kern.h>
   91 #include <vm/vm_extern.h>
   92 #include <vm/default_pager.h>
   93 #include <vm/swap_pager.h>
   94 #include <vm/vm_zone.h>
   95 
   96 /*
   97  *      Virtual memory maps provide for the mapping, protection,
   98  *      and sharing of virtual memory objects.  In addition,
   99  *      this module provides for an efficient virtual copy of
  100  *      memory from one map to another.
  101  *
  102  *      Synchronization is required prior to most operations.
  103  *
  104  *      Maps consist of an ordered doubly-linked list of simple
  105  *      entries; a single hint is used to speed up lookups.
  106  *
  107  *      In order to properly represent the sharing of virtual
  108  *      memory regions among maps, the map structure is bi-level.
  109  *      Top-level ("address") maps refer to regions of sharable
  110  *      virtual memory.  These regions are implemented as
  111  *      ("sharing") maps, which then refer to the actual virtual
  112  *      memory objects.  When two address maps "share" memory,
  113  *      their top-level maps both have references to the same
  114  *      sharing map.  When memory is virtual-copied from one
  115  *      address map to another, the references in the sharing
  116  *      maps are actually copied -- no copying occurs at the
  117  *      virtual memory object level.
  118  *
  119  *      Since portions of maps are specified by start/end addreses,
  120  *      which may not align with existing map entries, all
  121  *      routines merely "clip" entries to these start/end values.
  122  *      [That is, an entry is split into two, bordering at a
  123  *      start or end value.]  Note that these clippings may not
  124  *      always be necessary (as the two resulting entries are then
  125  *      not changed); however, the clipping is done for convenience.
  126  *
  127  *      As mentioned above, virtual copy operations are performed
  128  *      by copying VM object references from one sharing map to
  129  *      another, and then marking both regions as copy-on-write.
  130  *      It is important to note that only one writeable reference
  131  *      to a VM object region exists in any map -- this means that
  132  *      shadow object creation can be delayed until a write operation
  133  *      occurs.
  134  */
  135 
  136 /*
  137  *      vm_map_startup:
  138  *
  139  *      Initialize the vm_map module.  Must be called before
  140  *      any other vm_map routines.
  141  *
  142  *      Map and entry structures are allocated from the general
  143  *      purpose memory pool with some exceptions:
  144  *
  145  *      - The kernel map and kmem submap are allocated statically.
  146  *      - Kernel map entries are allocated out of a static pool.
  147  *
  148  *      These restrictions are necessary since malloc() uses the
  149  *      maps and requires map entries.
  150  */
  151 
  152 extern char kstack[];
  153 extern int inmprotect;
  154 
  155 static struct vm_zone kmapentzone_store, mapentzone_store, mapzone_store;
  156 static vm_zone_t mapentzone, kmapentzone, mapzone, vmspace_zone;
  157 static struct vm_object kmapentobj, mapentobj, mapobj;
  158 #define MAP_ENTRY_INIT  128
  159 static struct vm_map_entry map_entry_init[MAX_MAPENT];
  160 static struct vm_map_entry kmap_entry_init[MAX_KMAPENT];
  161 static struct vm_map map_init[MAX_KMAP];
  162 
  163 static void _vm_map_clip_end __P((vm_map_t, vm_map_entry_t, vm_offset_t));
  164 static void _vm_map_clip_start __P((vm_map_t, vm_map_entry_t, vm_offset_t));
  165 static vm_map_entry_t vm_map_entry_create __P((vm_map_t));
  166 static void vm_map_entry_delete __P((vm_map_t, vm_map_entry_t));
  167 static void vm_map_entry_dispose __P((vm_map_t, vm_map_entry_t));
  168 static void vm_map_entry_unwire __P((vm_map_t, vm_map_entry_t));
  169 static void vm_map_copy_entry __P((vm_map_t, vm_map_t, vm_map_entry_t,
  170                 vm_map_entry_t));
  171 static void vm_map_split __P((vm_map_entry_t));
  172 
  173 void
  174 vm_map_startup()
  175 {
  176         mapzone = &mapzone_store;
  177         zbootinit(mapzone, "MAP", sizeof (struct vm_map),
  178                 map_init, MAX_KMAP);
  179         kmapentzone = &kmapentzone_store;
  180         zbootinit(kmapentzone, "KMAP ENTRY", sizeof (struct vm_map_entry),
  181                 kmap_entry_init, MAX_KMAPENT);
  182         mapentzone = &mapentzone_store;
  183         zbootinit(mapentzone, "MAP ENTRY", sizeof (struct vm_map_entry),
  184                 map_entry_init, MAX_MAPENT);
  185 }
  186 
  187 /*
  188  * Allocate a vmspace structure, including a vm_map and pmap,
  189  * and initialize those structures.  The refcnt is set to 1.
  190  * The remaining fields must be initialized by the caller.
  191  */
  192 struct vmspace *
  193 vmspace_alloc(min, max)
  194         vm_offset_t min, max;
  195 {
  196         struct vmspace *vm;
  197 
  198         vm = zalloc(vmspace_zone);
  199         bzero(&vm->vm_map, sizeof vm->vm_map);
  200         vm_map_init(&vm->vm_map, min, max);
  201         pmap_pinit(&vm->vm_pmap);
  202         vm->vm_map.pmap = &vm->vm_pmap;         /* XXX */
  203         vm->vm_refcnt = 1;
  204         vm->vm_shm = NULL;
  205         return (vm);
  206 }
  207 
  208 void
  209 vm_init2(void) {
  210         zinitna(kmapentzone, &kmapentobj,
  211                 NULL, 0, cnt.v_page_count / 4, ZONE_INTERRUPT, 1);
  212         zinitna(mapentzone, &mapentobj,
  213                 NULL, 0, 0, 0, 1);
  214         zinitna(mapzone, &mapobj,
  215                 NULL, 0, 0, 0, 1);
  216         vmspace_zone = zinit("VMSPACE", sizeof (struct vmspace), 0, 0, 3);
  217         pmap_init2();
  218         vm_object_init2();
  219 }
  220 
  221 void
  222 vmspace_free(vm)
  223         struct vmspace *vm;
  224 {
  225 
  226         if (vm->vm_refcnt == 0)
  227                 panic("vmspace_free: attempt to free already freed vmspace");
  228 
  229         if (--vm->vm_refcnt == 0) {
  230 
  231                 /*
  232                  * Lock the map, to wait out all other references to it.
  233                  * Delete all of the mappings and pages they hold, then call
  234                  * the pmap module to reclaim anything left.
  235                  */
  236                 vm_map_lock(&vm->vm_map);
  237                 (void) vm_map_delete(&vm->vm_map, vm->vm_map.min_offset,
  238                     vm->vm_map.max_offset);
  239                 vm_map_unlock(&vm->vm_map);
  240 
  241                 pmap_release(&vm->vm_pmap);
  242                 zfree(vmspace_zone, vm);
  243         }
  244 }
  245 
  246 /*
  247  *      vm_map_create:
  248  *
  249  *      Creates and returns a new empty VM map with
  250  *      the given physical map structure, and having
  251  *      the given lower and upper address bounds.
  252  */
  253 vm_map_t
  254 vm_map_create(pmap, min, max)
  255         pmap_t pmap;
  256         vm_offset_t min, max;
  257 {
  258         vm_map_t result;
  259 
  260         result = zalloc(mapzone);
  261         vm_map_init(result, min, max);
  262         result->pmap = pmap;
  263         return (result);
  264 }
  265 
  266 /*
  267  * Initialize an existing vm_map structure
  268  * such as that in the vmspace structure.
  269  * The pmap is set elsewhere.
  270  */
  271 void
  272 vm_map_init(map, min, max)
  273         struct vm_map *map;
  274         vm_offset_t min, max;
  275 {
  276         map->header.next = map->header.prev = &map->header;
  277         map->nentries = 0;
  278         map->size = 0;
  279         map->is_main_map = TRUE;
  280         map->system_map = 0;
  281         map->min_offset = min;
  282         map->max_offset = max;
  283         map->first_free = &map->header;
  284         map->hint = &map->header;
  285         map->timestamp = 0;
  286         lockinit(&map->lock, PVM, "thrd_sleep", 0, LK_NOPAUSE);
  287 }
  288 
  289 /*
  290  *      vm_map_entry_dispose:   [ internal use only ]
  291  *
  292  *      Inverse of vm_map_entry_create.
  293  */
  294 static void
  295 vm_map_entry_dispose(map, entry)
  296         vm_map_t map;
  297         vm_map_entry_t entry;
  298 {
  299         zfree((map->system_map || !mapentzone) ? kmapentzone : mapentzone, entry);
  300 }
  301 
  302 /*
  303  *      vm_map_entry_create:    [ internal use only ]
  304  *
  305  *      Allocates a VM map entry for insertion.
  306  *      No entry fields are filled in.  This routine is
  307  */
  308 static vm_map_entry_t
  309 vm_map_entry_create(map)
  310         vm_map_t map;
  311 {
  312         return zalloc((map->system_map || !mapentzone) ? kmapentzone : mapentzone);
  313 }
  314 
  315 /*
  316  *      vm_map_entry_{un,}link:
  317  *
  318  *      Insert/remove entries from maps.
  319  */
  320 #define vm_map_entry_link(map, after_where, entry) \
  321                 { \
  322                 (map)->nentries++; \
  323                 (map)->timestamp++; \
  324                 (entry)->prev = (after_where); \
  325                 (entry)->next = (after_where)->next; \
  326                 (entry)->prev->next = (entry); \
  327                 (entry)->next->prev = (entry); \
  328                 }
  329 #define vm_map_entry_unlink(map, entry) \
  330                 { \
  331                 (map)->nentries--; \
  332                 (map)->timestamp++; \
  333                 (entry)->next->prev = (entry)->prev; \
  334                 (entry)->prev->next = (entry)->next; \
  335                 }
  336 
  337 /*
  338  *      SAVE_HINT:
  339  *
  340  *      Saves the specified entry as the hint for
  341  *      future lookups.
  342  */
  343 #define SAVE_HINT(map,value) \
  344                 (map)->hint = (value);
  345 
  346 /*
  347  *      vm_map_lookup_entry:    [ internal use only ]
  348  *
  349  *      Finds the map entry containing (or
  350  *      immediately preceding) the specified address
  351  *      in the given map; the entry is returned
  352  *      in the "entry" parameter.  The boolean
  353  *      result indicates whether the address is
  354  *      actually contained in the map.
  355  */
  356 boolean_t
  357 vm_map_lookup_entry(map, address, entry)
  358         vm_map_t map;
  359         vm_offset_t address;
  360         vm_map_entry_t *entry;  /* OUT */
  361 {
  362         vm_map_entry_t cur;
  363         vm_map_entry_t last;
  364 
  365         /*
  366          * Start looking either from the head of the list, or from the hint.
  367          */
  368 
  369         cur = map->hint;
  370 
  371         if (cur == &map->header)
  372                 cur = cur->next;
  373 
  374         if (address >= cur->start) {
  375                 /*
  376                  * Go from hint to end of list.
  377                  *
  378                  * But first, make a quick check to see if we are already looking
  379                  * at the entry we want (which is usually the case). Note also
  380                  * that we don't need to save the hint here... it is the same
  381                  * hint (unless we are at the header, in which case the hint
  382                  * didn't buy us anything anyway).
  383                  */
  384                 last = &map->header;
  385                 if ((cur != last) && (cur->end > address)) {
  386                         *entry = cur;
  387                         return (TRUE);
  388                 }
  389         } else {
  390                 /*
  391                  * Go from start to hint, *inclusively*
  392                  */
  393                 last = cur->next;
  394                 cur = map->header.next;
  395         }
  396 
  397         /*
  398          * Search linearly
  399          */
  400 
  401         while (cur != last) {
  402                 if (cur->end > address) {
  403                         if (address >= cur->start) {
  404                                 /*
  405                                  * Save this lookup for future hints, and
  406                                  * return
  407                                  */
  408 
  409                                 *entry = cur;
  410                                 SAVE_HINT(map, cur);
  411                                 return (TRUE);
  412                         }
  413                         break;
  414                 }
  415                 cur = cur->next;
  416         }
  417         *entry = cur->prev;
  418         SAVE_HINT(map, *entry);
  419         return (FALSE);
  420 }
  421 
  422 /*
  423  *      vm_map_insert:
  424  *
  425  *      Inserts the given whole VM object into the target
  426  *      map at the specified address range.  The object's
  427  *      size should match that of the address range.
  428  *
  429  *      Requires that the map be locked, and leaves it so.
  430  */
  431 int
  432 vm_map_insert(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
  433               vm_offset_t start, vm_offset_t end, vm_prot_t prot, vm_prot_t max,
  434               int cow)
  435 {
  436         vm_map_entry_t new_entry;
  437         vm_map_entry_t prev_entry;
  438         vm_map_entry_t temp_entry;
  439         vm_object_t prev_object;
  440         u_char protoeflags;
  441 
  442         if ((object != NULL) && (cow & MAP_NOFAULT)) {
  443                 panic("vm_map_insert: paradoxical MAP_NOFAULT request");
  444         }
  445 
  446         /*
  447          * Check that the start and end points are not bogus.
  448          */
  449 
  450         if ((start < map->min_offset) || (end > map->max_offset) ||
  451             (start >= end))
  452                 return (KERN_INVALID_ADDRESS);
  453 
  454         /*
  455          * Find the entry prior to the proposed starting address; if it's part
  456          * of an existing entry, this range is bogus.
  457          */
  458 
  459         if (vm_map_lookup_entry(map, start, &temp_entry))
  460                 return (KERN_NO_SPACE);
  461 
  462         prev_entry = temp_entry;
  463 
  464         /*
  465          * Assert that the next entry doesn't overlap the end point.
  466          */
  467 
  468         if ((prev_entry->next != &map->header) &&
  469             (prev_entry->next->start < end))
  470                 return (KERN_NO_SPACE);
  471 
  472         protoeflags = 0;
  473         if (cow & MAP_COPY_NEEDED)
  474                 protoeflags |= MAP_ENTRY_NEEDS_COPY;
  475 
  476         if (cow & MAP_COPY_ON_WRITE)
  477                 protoeflags |= MAP_ENTRY_COW;
  478 
  479         if (cow & MAP_NOFAULT)
  480                 protoeflags |= MAP_ENTRY_NOFAULT;
  481 
  482         /*
  483          * See if we can avoid creating a new entry by extending one of our
  484          * neighbors.  Or at least extend the object.
  485          */
  486 
  487         if (object != NULL) {
  488                 if ((object->ref_count > 1) || (object->shadow_count != 0)) {
  489                         vm_object_clear_flag(object, OBJ_ONEMAPPING);
  490                 } else {
  491                         vm_object_set_flag(object, OBJ_ONEMAPPING);
  492                 }
  493         }
  494         else if ((prev_entry != &map->header) &&
  495             (( prev_entry->eflags & (MAP_ENTRY_IS_A_MAP | MAP_ENTRY_IS_SUB_MAP)) == 0) &&
  496                 ((prev_entry->object.vm_object == NULL) ||
  497                         (prev_entry->object.vm_object->type == OBJT_DEFAULT)) &&
  498             (prev_entry->end == start) &&
  499             (prev_entry->wired_count == 0)) {
  500                 
  501 
  502                 if ((protoeflags == prev_entry->eflags) &&
  503                     ((cow & MAP_NOFAULT) ||
  504                      vm_object_coalesce(prev_entry->object.vm_object,
  505                                         OFF_TO_IDX(prev_entry->offset),
  506                                         (vm_size_t) (prev_entry->end - prev_entry->start),
  507                                         (vm_size_t) (end - prev_entry->end)))) {
  508 
  509                         /*
  510                          * Coalesced the two objects.  Can we extend the
  511                          * previous map entry to include the new range?
  512                          */
  513                         if ((prev_entry->inheritance == VM_INHERIT_DEFAULT) &&
  514                             (prev_entry->protection == prot) &&
  515                             (prev_entry->max_protection == max)) {
  516 
  517                                 map->size += (end - prev_entry->end);
  518                                 prev_entry->end = end;
  519                                 if ((cow & MAP_NOFAULT) == 0) {
  520                                         prev_object = prev_entry->object.vm_object;
  521                                         default_pager_convert_to_swapq(prev_object);
  522                                 }
  523                                 return (KERN_SUCCESS);
  524                         }
  525                         else {
  526                                 object = prev_entry->object.vm_object;
  527                                 offset = prev_entry->offset + (prev_entry->end -
  528                                                                prev_entry->start);
  529 
  530                                 vm_object_reference(object);
  531                         }
  532                 }
  533         }
  534 
  535         /*
  536          * Create a new entry
  537          */
  538 
  539         new_entry = vm_map_entry_create(map);
  540         new_entry->start = start;
  541         new_entry->end = end;
  542 
  543         new_entry->eflags = protoeflags;
  544         new_entry->object.vm_object = object;
  545         new_entry->offset = offset;
  546         new_entry->avail_ssize = 0;
  547 
  548         if (map->is_main_map) {
  549                 new_entry->inheritance = VM_INHERIT_DEFAULT;
  550                 new_entry->protection = prot;
  551                 new_entry->max_protection = max;
  552                 new_entry->wired_count = 0;
  553         }
  554         /*
  555          * Insert the new entry into the list
  556          */
  557 
  558         vm_map_entry_link(map, prev_entry, new_entry);
  559         map->size += new_entry->end - new_entry->start;
  560 
  561         /*
  562          * Update the free space hint
  563          */
  564         if ((map->first_free == prev_entry) &&
  565                 (prev_entry->end >= new_entry->start))
  566                 map->first_free = new_entry;
  567 
  568         default_pager_convert_to_swapq(object);
  569         return (KERN_SUCCESS);
  570 }
  571 
  572 /*
  573  * Find sufficient space for `length' bytes in the given map, starting at
  574  * `start'.  The map must be locked.  Returns 0 on success, 1 on no space.
  575  */
  576 int
  577 vm_map_findspace(map, start, length, addr)
  578         vm_map_t map;
  579         vm_offset_t start;
  580         vm_size_t length;
  581         vm_offset_t *addr;
  582 {
  583         vm_map_entry_t entry, next;
  584         vm_offset_t end;
  585 
  586         if (start < map->min_offset)
  587                 start = map->min_offset;
  588         if (start > map->max_offset)
  589                 return (1);
  590 
  591         /*
  592          * Look for the first possible address; if there's already something
  593          * at this address, we have to start after it.
  594          */
  595         if (start == map->min_offset) {
  596                 if ((entry = map->first_free) != &map->header)
  597                         start = entry->end;
  598         } else {
  599                 vm_map_entry_t tmp;
  600 
  601                 if (vm_map_lookup_entry(map, start, &tmp))
  602                         start = tmp->end;
  603                 entry = tmp;
  604         }
  605 
  606         /*
  607          * Look through the rest of the map, trying to fit a new region in the
  608          * gap between existing regions, or after the very last region.
  609          */
  610         for (;; start = (entry = next)->end) {
  611                 /*
  612                  * Find the end of the proposed new region.  Be sure we didn't
  613                  * go beyond the end of the map, or wrap around the address;
  614                  * if so, we lose.  Otherwise, if this is the last entry, or
  615                  * if the proposed new region fits before the next entry, we
  616                  * win.
  617                  */
  618                 end = start + length;
  619                 if (end > map->max_offset || end < start)
  620                         return (1);
  621                 next = entry->next;
  622                 if (next == &map->header || next->start >= end)
  623                         break;
  624         }
  625         SAVE_HINT(map, entry);
  626         *addr = start;
  627         if (map == kernel_map) {
  628                 vm_offset_t ksize;
  629                 if ((ksize = round_page(start + length)) > kernel_vm_end) {
  630                         pmap_growkernel(ksize);
  631                 }
  632         }
  633         return (0);
  634 }
  635 
  636 /*
  637  *      vm_map_find finds an unallocated region in the target address
  638  *      map with the given length.  The search is defined to be
  639  *      first-fit from the specified address; the region found is
  640  *      returned in the same parameter.
  641  *
  642  */
  643 int
  644 vm_map_find(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
  645             vm_offset_t *addr,  /* IN/OUT */
  646             vm_size_t length, boolean_t find_space, vm_prot_t prot,
  647             vm_prot_t max, int cow)
  648 {
  649         vm_offset_t start;
  650         int result, s = 0;
  651 
  652         start = *addr;
  653 
  654         if (map == kmem_map || map == mb_map)
  655                 s = splvm();
  656 
  657         vm_map_lock(map);
  658         if (find_space) {
  659                 if (vm_map_findspace(map, start, length, addr)) {
  660                         vm_map_unlock(map);
  661                         if (map == kmem_map || map == mb_map)
  662                                 splx(s);
  663                         return (KERN_NO_SPACE);
  664                 }
  665                 start = *addr;
  666         }
  667         result = vm_map_insert(map, object, offset,
  668                 start, start + length, prot, max, cow);
  669         vm_map_unlock(map);
  670 
  671         if (map == kmem_map || map == mb_map)
  672                 splx(s);
  673 
  674         return (result);
  675 }
  676 
  677 /*
  678  *      vm_map_simplify_entry:
  679  *
  680  *      Simplify the given map entry by merging with either neighbor.
  681  */
  682 void
  683 vm_map_simplify_entry(map, entry)
  684         vm_map_t map;
  685         vm_map_entry_t entry;
  686 {
  687         vm_map_entry_t next, prev;
  688         vm_size_t prevsize, esize;
  689 
  690         if (entry->eflags & (MAP_ENTRY_IS_SUB_MAP|MAP_ENTRY_IS_A_MAP))
  691                 return;
  692 
  693         prev = entry->prev;
  694         if (prev != &map->header) {
  695                 prevsize = prev->end - prev->start;
  696                 if ( (prev->end == entry->start) &&
  697                      (prev->object.vm_object == entry->object.vm_object) &&
  698                      (!prev->object.vm_object ||
  699                         (prev->offset + prevsize == entry->offset)) &&
  700                      (prev->eflags == entry->eflags) &&
  701                      (prev->protection == entry->protection) &&
  702                      (prev->max_protection == entry->max_protection) &&
  703                      (prev->inheritance == entry->inheritance) &&
  704                      (prev->wired_count == entry->wired_count)) {
  705                         if (map->first_free == prev)
  706                                 map->first_free = entry;
  707                         if (map->hint == prev)
  708                                 map->hint = entry;
  709                         vm_map_entry_unlink(map, prev);
  710                         entry->start = prev->start;
  711                         entry->offset = prev->offset;
  712                         if (prev->object.vm_object)
  713                                 vm_object_deallocate(prev->object.vm_object);
  714                         vm_map_entry_dispose(map, prev);
  715                 }
  716         }
  717 
  718         next = entry->next;
  719         if (next != &map->header) {
  720                 esize = entry->end - entry->start;
  721                 if ((entry->end == next->start) &&
  722                     (next->object.vm_object == entry->object.vm_object) &&
  723                      (!entry->object.vm_object ||
  724                         (entry->offset + esize == next->offset)) &&
  725                     (next->eflags == entry->eflags) &&
  726                     (next->protection == entry->protection) &&
  727                     (next->max_protection == entry->max_protection) &&
  728                     (next->inheritance == entry->inheritance) &&
  729                     (next->wired_count == entry->wired_count)) {
  730                         if (map->first_free == next)
  731                                 map->first_free = entry;
  732                         if (map->hint == next)
  733                                 map->hint = entry;
  734                         vm_map_entry_unlink(map, next);
  735                         entry->end = next->end;
  736                         if (next->object.vm_object)
  737                                 vm_object_deallocate(next->object.vm_object);
  738                         vm_map_entry_dispose(map, next);
  739                 }
  740         }
  741 }
  742 /*
  743  *      vm_map_clip_start:      [ internal use only ]
  744  *
  745  *      Asserts that the given entry begins at or after
  746  *      the specified address; if necessary,
  747  *      it splits the entry into two.
  748  */
  749 #define vm_map_clip_start(map, entry, startaddr) \
  750 { \
  751         if (startaddr > entry->start) \
  752                 _vm_map_clip_start(map, entry, startaddr); \
  753         else if (entry->object.vm_object && (entry->object.vm_object->ref_count == 1)) \
  754                 vm_object_set_flag(entry->object.vm_object, OBJ_ONEMAPPING); \
  755 }
  756 
  757 /*
  758  *      This routine is called only when it is known that
  759  *      the entry must be split.
  760  */
  761 static void
  762 _vm_map_clip_start(map, entry, start)
  763         vm_map_t map;
  764         vm_map_entry_t entry;
  765         vm_offset_t start;
  766 {
  767         vm_map_entry_t new_entry;
  768 
  769         /*
  770          * Split off the front portion -- note that we must insert the new
  771          * entry BEFORE this one, so that this entry has the specified
  772          * starting address.
  773          */
  774 
  775         vm_map_simplify_entry(map, entry);
  776 
  777         /*
  778          * If there is no object backing this entry, we might as well create
  779          * one now.  If we defer it, an object can get created after the map
  780          * is clipped, and individual objects will be created for the split-up
  781          * map.  This is a bit of a hack, but is also about the best place to
  782          * put this improvement.
  783          */
  784 
  785         if (entry->object.vm_object == NULL) {
  786                 vm_object_t object;
  787                 object = vm_object_allocate(OBJT_DEFAULT,
  788                                 atop(entry->end - entry->start));
  789                 entry->object.vm_object = object;
  790                 entry->offset = 0;
  791         }
  792 
  793         new_entry = vm_map_entry_create(map);
  794         *new_entry = *entry;
  795 
  796         new_entry->end = start;
  797         entry->offset += (start - entry->start);
  798         entry->start = start;
  799 
  800         vm_map_entry_link(map, entry->prev, new_entry);
  801 
  802         if ((entry->eflags & (MAP_ENTRY_IS_A_MAP|MAP_ENTRY_IS_SUB_MAP)) == 0) {
  803                 if (new_entry->object.vm_object->ref_count == 1)
  804                         vm_object_set_flag(new_entry->object.vm_object,
  805                                            OBJ_ONEMAPPING);
  806                 vm_object_reference(new_entry->object.vm_object);
  807         }
  808 }
  809 
  810 /*
  811  *      vm_map_clip_end:        [ internal use only ]
  812  *
  813  *      Asserts that the given entry ends at or before
  814  *      the specified address; if necessary,
  815  *      it splits the entry into two.
  816  */
  817 
  818 #define vm_map_clip_end(map, entry, endaddr) \
  819 { \
  820         if (endaddr < entry->end) \
  821                 _vm_map_clip_end(map, entry, endaddr); \
  822         else if (entry->object.vm_object && (entry->object.vm_object->ref_count == 1)) \
  823                 vm_object_set_flag(entry->object.vm_object, OBJ_ONEMAPPING); \
  824 }
  825 
  826 /*
  827  *      This routine is called only when it is known that
  828  *      the entry must be split.
  829  */
  830 static void
  831 _vm_map_clip_end(map, entry, end)
  832         vm_map_t map;
  833         vm_map_entry_t entry;
  834         vm_offset_t end;
  835 {
  836         vm_map_entry_t new_entry;
  837 
  838         /*
  839          * If there is no object backing this entry, we might as well create
  840          * one now.  If we defer it, an object can get created after the map
  841          * is clipped, and individual objects will be created for the split-up
  842          * map.  This is a bit of a hack, but is also about the best place to
  843          * put this improvement.
  844          */
  845 
  846         if (entry->object.vm_object == NULL) {
  847                 vm_object_t object;
  848                 object = vm_object_allocate(OBJT_DEFAULT,
  849                                 atop(entry->end - entry->start));
  850                 entry->object.vm_object = object;
  851                 entry->offset = 0;
  852         }
  853 
  854         /*
  855          * Create a new entry and insert it AFTER the specified entry
  856          */
  857 
  858         new_entry = vm_map_entry_create(map);
  859         *new_entry = *entry;
  860 
  861         new_entry->start = entry->end = end;
  862         new_entry->offset += (end - entry->start);
  863 
  864         vm_map_entry_link(map, entry, new_entry);
  865 
  866         if ((entry->eflags & (MAP_ENTRY_IS_A_MAP|MAP_ENTRY_IS_SUB_MAP)) == 0) {
  867                 if (new_entry->object.vm_object->ref_count == 1)
  868                         vm_object_set_flag(new_entry->object.vm_object,
  869                                            OBJ_ONEMAPPING);
  870                 vm_object_reference(new_entry->object.vm_object);
  871         }
  872 }
  873 
  874 /*
  875  *      VM_MAP_RANGE_CHECK:     [ internal use only ]
  876  *
  877  *      Asserts that the starting and ending region
  878  *      addresses fall within the valid range of the map.
  879  */
  880 #define VM_MAP_RANGE_CHECK(map, start, end)             \
  881                 {                                       \
  882                 if (start < vm_map_min(map))            \
  883                         start = vm_map_min(map);        \
  884                 if (end > vm_map_max(map))              \
  885                         end = vm_map_max(map);          \
  886                 if (start > end)                        \
  887                         start = end;                    \
  888                 }
  889 
  890 /*
  891  *      vm_map_submap:          [ kernel use only ]
  892  *
  893  *      Mark the given range as handled by a subordinate map.
  894  *
  895  *      This range must have been created with vm_map_find,
  896  *      and no other operations may have been performed on this
  897  *      range prior to calling vm_map_submap.
  898  *
  899  *      Only a limited number of operations can be performed
  900  *      within this rage after calling vm_map_submap:
  901  *              vm_fault
  902  *      [Don't try vm_map_copy!]
  903  *
  904  *      To remove a submapping, one must first remove the
  905  *      range from the superior map, and then destroy the
  906  *      submap (if desired).  [Better yet, don't try it.]
  907  */
  908 int
  909 vm_map_submap(map, start, end, submap)
  910         vm_map_t map;
  911         vm_offset_t start;
  912         vm_offset_t end;
  913         vm_map_t submap;
  914 {
  915         vm_map_entry_t entry;
  916         int result = KERN_INVALID_ARGUMENT;
  917 
  918         vm_map_lock(map);
  919 
  920         VM_MAP_RANGE_CHECK(map, start, end);
  921 
  922         if (vm_map_lookup_entry(map, start, &entry)) {
  923                 vm_map_clip_start(map, entry, start);
  924         } else
  925                 entry = entry->next;
  926 
  927         vm_map_clip_end(map, entry, end);
  928 
  929         if ((entry->start == start) && (entry->end == end) &&
  930             ((entry->eflags & (MAP_ENTRY_IS_A_MAP|MAP_ENTRY_COW)) == 0) &&
  931             (entry->object.vm_object == NULL)) {
  932                 entry->object.sub_map = submap;
  933                 entry->eflags |= MAP_ENTRY_IS_SUB_MAP;
  934                 result = KERN_SUCCESS;
  935         }
  936         vm_map_unlock(map);
  937 
  938         return (result);
  939 }
  940 
  941 /*
  942  *      vm_map_protect:
  943  *
  944  *      Sets the protection of the specified address
  945  *      region in the target map.  If "set_max" is
  946  *      specified, the maximum protection is to be set;
  947  *      otherwise, only the current protection is affected.
  948  */
  949 int
  950 vm_map_protect(vm_map_t map, vm_offset_t start, vm_offset_t end,
  951                vm_prot_t new_prot, boolean_t set_max)
  952 {
  953         vm_map_entry_t current;
  954         vm_map_entry_t entry;
  955 
  956         vm_map_lock(map);
  957 
  958         VM_MAP_RANGE_CHECK(map, start, end);
  959 
  960         if (vm_map_lookup_entry(map, start, &entry)) {
  961                 vm_map_clip_start(map, entry, start);
  962         } else {
  963                 entry = entry->next;
  964         }
  965 
  966         /*
  967          * Make a first pass to check for protection violations.
  968          */
  969 
  970         current = entry;
  971         while ((current != &map->header) && (current->start < end)) {
  972                 if (current->eflags & MAP_ENTRY_IS_SUB_MAP) {
  973                         vm_map_unlock(map);
  974                         return (KERN_INVALID_ARGUMENT);
  975                 }
  976                 if ((new_prot & current->max_protection) != new_prot) {
  977                         vm_map_unlock(map);
  978                         return (KERN_PROTECTION_FAILURE);
  979                 }
  980                 current = current->next;
  981         }
  982 
  983         /*
  984          * Go back and fix up protections. [Note that clipping is not
  985          * necessary the second time.]
  986          */
  987 
  988         current = entry;
  989 
  990         while ((current != &map->header) && (current->start < end)) {
  991                 vm_prot_t old_prot;
  992 
  993                 vm_map_clip_end(map, current, end);
  994 
  995                 old_prot = current->protection;
  996                 if (set_max)
  997                         current->protection =
  998                             (current->max_protection = new_prot) &
  999                             old_prot;
 1000                 else
 1001                         current->protection = new_prot;
 1002 
 1003                 /*
 1004                  * Update physical map if necessary. Worry about copy-on-write
 1005                  * here -- CHECK THIS XXX
 1006                  */
 1007 
 1008                 if (current->protection != old_prot) {
 1009 #define MASK(entry)     (((entry)->eflags & MAP_ENTRY_COW) ? ~VM_PROT_WRITE : \
 1010                                                         VM_PROT_ALL)
 1011 
 1012                         if (current->eflags & MAP_ENTRY_IS_A_MAP) {
 1013                                 vm_map_entry_t share_entry;
 1014                                 vm_offset_t share_end;
 1015 
 1016                                 vm_map_lock(current->object.share_map);
 1017                                 (void) vm_map_lookup_entry(
 1018                                     current->object.share_map,
 1019                                     current->offset,
 1020                                     &share_entry);
 1021                                 share_end = current->offset +
 1022                                     (current->end - current->start);
 1023                                 while ((share_entry !=
 1024                                         &current->object.share_map->header) &&
 1025                                     (share_entry->start < share_end)) {
 1026 
 1027                                         pmap_protect(map->pmap,
 1028                                             (qmax(share_entry->start,
 1029                                                     current->offset) -
 1030                                                 current->offset +
 1031                                                 current->start),
 1032                                             min(share_entry->end,
 1033                                                 share_end) -
 1034                                             current->offset +
 1035                                             current->start,
 1036                                             current->protection &
 1037                                             MASK(share_entry));
 1038 
 1039                                         share_entry = share_entry->next;
 1040                                 }
 1041                                 vm_map_unlock(current->object.share_map);
 1042                         } else
 1043                                 pmap_protect(map->pmap, current->start,
 1044                                     current->end,
 1045                                     current->protection & MASK(current));
 1046 #undef  MASK
 1047                 }
 1048 
 1049                 vm_map_simplify_entry(map, current);
 1050 
 1051                 current = current->next;
 1052         }
 1053 
 1054         map->timestamp++;
 1055         vm_map_unlock(map);
 1056         return (KERN_SUCCESS);
 1057 }
 1058 
 1059 /*
 1060  *      vm_map_madvise:
 1061  *
 1062  *      This routine traverses a processes map handling the madvise
 1063  *      system call.
 1064  */
 1065 void
 1066 vm_map_madvise(map, pmap, start, end, advise)
 1067         vm_map_t map;
 1068         pmap_t pmap;
 1069         vm_offset_t start, end;
 1070         int advise;
 1071 {
 1072         vm_map_entry_t current;
 1073         vm_map_entry_t entry;
 1074 
 1075         vm_map_lock(map);
 1076 
 1077         VM_MAP_RANGE_CHECK(map, start, end);
 1078 
 1079         if (vm_map_lookup_entry(map, start, &entry)) {
 1080                 vm_map_clip_start(map, entry, start);
 1081         } else
 1082                 entry = entry->next;
 1083 
 1084         for(current = entry;
 1085                 (current != &map->header) && (current->start < end);
 1086                 current = current->next) {
 1087                 vm_size_t size;
 1088 
 1089                 if (current->eflags & (MAP_ENTRY_IS_A_MAP|MAP_ENTRY_IS_SUB_MAP)) {
 1090                         continue;
 1091                 }
 1092 
 1093                 vm_map_clip_end(map, current, end);
 1094                 size = current->end - current->start;
 1095 
 1096                 /*
 1097                  * Create an object if needed
 1098                  */
 1099                 if (current->object.vm_object == NULL) {
 1100                         vm_object_t object;
 1101                         if ((advise == MADV_FREE) || (advise == MADV_DONTNEED))
 1102                                 continue;
 1103                         object = vm_object_allocate(OBJT_DEFAULT, OFF_TO_IDX(size));
 1104                         current->object.vm_object = object;
 1105                         current->offset = 0;
 1106                 }
 1107 
 1108                 switch (advise) {
 1109         case MADV_NORMAL:
 1110                         current->object.vm_object->behavior = OBJ_NORMAL;
 1111                         break;
 1112         case MADV_SEQUENTIAL:
 1113                         current->object.vm_object->behavior = OBJ_SEQUENTIAL;
 1114                         break;
 1115         case MADV_RANDOM:
 1116                         current->object.vm_object->behavior = OBJ_RANDOM;
 1117                         break;
 1118         /*
 1119          * Right now, we could handle DONTNEED and WILLNEED with common code.
 1120          * They are mostly the same, except for the potential async reads (NYI).
 1121          */
 1122         case MADV_FREE:
 1123         case MADV_DONTNEED:
 1124                         {
 1125                                 vm_pindex_t pindex;
 1126                                 int count;
 1127                                 pindex = OFF_TO_IDX(current->offset);
 1128                                 count = OFF_TO_IDX(size);
 1129                                 /*
 1130                                  * MADV_DONTNEED removes the page from all
 1131                                  * pmaps, so pmap_remove is not necessary.
 1132                                  */
 1133                                 vm_object_madvise(current->object.vm_object,
 1134                                         pindex, count, advise);
 1135                         }
 1136                         break;
 1137 
 1138         case MADV_WILLNEED:
 1139                         {
 1140                                 vm_pindex_t pindex;
 1141                                 int count;
 1142                                 pindex = OFF_TO_IDX(current->offset);
 1143                                 count = OFF_TO_IDX(size);
 1144                                 vm_object_madvise(current->object.vm_object,
 1145                                         pindex, count, advise);
 1146                                 pmap_object_init_pt(pmap, current->start,
 1147                                         current->object.vm_object, pindex,
 1148                                         (count << PAGE_SHIFT), 0);
 1149                         }
 1150                         break;
 1151 
 1152         default:
 1153                         break;
 1154                 }
 1155         }
 1156 
 1157         map->timestamp++;
 1158         vm_map_simplify_entry(map, entry);
 1159         vm_map_unlock(map);
 1160         return;
 1161 }       
 1162 
 1163 
 1164 /*
 1165  *      vm_map_inherit:
 1166  *
 1167  *      Sets the inheritance of the specified address
 1168  *      range in the target map.  Inheritance
 1169  *      affects how the map will be shared with
 1170  *      child maps at the time of vm_map_fork.
 1171  */
 1172 int
 1173 vm_map_inherit(vm_map_t map, vm_offset_t start, vm_offset_t end,
 1174                vm_inherit_t new_inheritance)
 1175 {
 1176         vm_map_entry_t entry;
 1177         vm_map_entry_t temp_entry;
 1178 
 1179         switch (new_inheritance) {
 1180         case VM_INHERIT_NONE:
 1181         case VM_INHERIT_COPY:
 1182         case VM_INHERIT_SHARE:
 1183                 break;
 1184         default:
 1185                 return (KERN_INVALID_ARGUMENT);
 1186         }
 1187 
 1188         vm_map_lock(map);
 1189 
 1190         VM_MAP_RANGE_CHECK(map, start, end);
 1191 
 1192         if (vm_map_lookup_entry(map, start, &temp_entry)) {
 1193                 entry = temp_entry;
 1194                 vm_map_clip_start(map, entry, start);
 1195         } else
 1196                 entry = temp_entry->next;
 1197 
 1198         while ((entry != &map->header) && (entry->start < end)) {
 1199                 vm_map_clip_end(map, entry, end);
 1200 
 1201                 entry->inheritance = new_inheritance;
 1202 
 1203                 entry = entry->next;
 1204         }
 1205 
 1206         vm_map_simplify_entry(map, temp_entry);
 1207         map->timestamp++;
 1208         vm_map_unlock(map);
 1209         return (KERN_SUCCESS);
 1210 }
 1211 
 1212 /*
 1213  * Implement the semantics of mlock
 1214  */
 1215 int
 1216 vm_map_user_pageable(map, start, end, new_pageable)
 1217         vm_map_t map;
 1218         vm_offset_t start;
 1219         vm_offset_t end;
 1220         boolean_t new_pageable;
 1221 {
 1222         vm_map_entry_t entry;
 1223         vm_map_entry_t start_entry;
 1224         vm_offset_t estart;
 1225         int rv;
 1226 
 1227         vm_map_lock(map);
 1228         VM_MAP_RANGE_CHECK(map, start, end);
 1229 
 1230         if (vm_map_lookup_entry(map, start, &start_entry) == FALSE) {
 1231                 vm_map_unlock(map);
 1232                 return (KERN_INVALID_ADDRESS);
 1233         }
 1234 
 1235         if (new_pageable) {
 1236 
 1237                 entry = start_entry;
 1238                 vm_map_clip_start(map, entry, start);
 1239 
 1240                 /*
 1241                  * Now decrement the wiring count for each region. If a region
 1242                  * becomes completely unwired, unwire its physical pages and
 1243                  * mappings.
 1244                  */
 1245                 vm_map_set_recursive(map);
 1246 
 1247                 entry = start_entry;
 1248                 while ((entry != &map->header) && (entry->start < end)) {
 1249                         if (entry->eflags & MAP_ENTRY_USER_WIRED) {
 1250                                 vm_map_clip_end(map, entry, end);
 1251                                 entry->eflags &= ~MAP_ENTRY_USER_WIRED;
 1252                                 entry->wired_count--;
 1253                                 if (entry->wired_count == 0)
 1254                                         vm_fault_unwire(map, entry->start, entry->end);
 1255                         }
 1256                         vm_map_simplify_entry(map,entry);
 1257                         entry = entry->next;
 1258                 }
 1259                 vm_map_clear_recursive(map);
 1260         } else {
 1261 
 1262                 entry = start_entry;
 1263 
 1264                 while ((entry != &map->header) && (entry->start < end)) {
 1265 
 1266                         if (entry->eflags & MAP_ENTRY_USER_WIRED) {
 1267                                 entry = entry->next;
 1268                                 continue;
 1269                         }
 1270                         
 1271                         if (entry->wired_count != 0) {
 1272                                 entry->wired_count++;
 1273                                 entry->eflags |= MAP_ENTRY_USER_WIRED;
 1274                                 entry = entry->next;
 1275                                 continue;
 1276                         }
 1277 
 1278                         /* Here on entry being newly wired */
 1279 
 1280                         if ((entry->eflags & (MAP_ENTRY_IS_A_MAP|MAP_ENTRY_IS_SUB_MAP)) == 0) {
 1281                                 int copyflag = entry->eflags & MAP_ENTRY_NEEDS_COPY;
 1282                                 if (copyflag && ((entry->protection & VM_PROT_WRITE) != 0)) {
 1283 
 1284                                         vm_object_shadow(&entry->object.vm_object,
 1285                                             &entry->offset,
 1286                                             atop(entry->end - entry->start));
 1287                                         entry->eflags &= ~MAP_ENTRY_NEEDS_COPY;
 1288 
 1289                                 } else if (entry->object.vm_object == NULL) {
 1290 
 1291                                         entry->object.vm_object =
 1292                                             vm_object_allocate(OBJT_DEFAULT,
 1293                                                 atop(entry->end - entry->start));
 1294                                         entry->offset = (vm_offset_t) 0;
 1295 
 1296                                 }
 1297                                 default_pager_convert_to_swapq(entry->object.vm_object);
 1298                         }
 1299 
 1300                         vm_map_clip_start(map, entry, start);
 1301                         vm_map_clip_end(map, entry, end);
 1302 
 1303                         entry->wired_count++;
 1304                         entry->eflags |= MAP_ENTRY_USER_WIRED;
 1305                         estart = entry->start;
 1306 
 1307                         /* First we need to allow map modifications */
 1308                         vm_map_set_recursive(map);
 1309                         vm_map_lock_downgrade(map);
 1310                         map->timestamp++;
 1311 
 1312                         rv = vm_fault_user_wire(map, entry->start, entry->end);
 1313                         if (rv) {
 1314 
 1315                                 entry->wired_count--;
 1316                                 entry->eflags &= ~MAP_ENTRY_USER_WIRED;
 1317 
 1318                                 vm_map_clear_recursive(map);
 1319                                 vm_map_unlock(map);
 1320                                 
 1321                                 (void) vm_map_user_pageable(map, start, entry->start, TRUE);
 1322                                 return rv;
 1323                         }
 1324 
 1325                         vm_map_clear_recursive(map);
 1326                         if (vm_map_lock_upgrade(map)) {
 1327                                 vm_map_lock(map);
 1328                                 if (vm_map_lookup_entry(map, estart, &entry) 
 1329                                     == FALSE) {
 1330                                         vm_map_unlock(map);
 1331                                         (void) vm_map_user_pageable(map,
 1332                                                                     start,
 1333                                                                     estart,
 1334                                                                     TRUE);
 1335                                         return (KERN_INVALID_ADDRESS);
 1336                                 }
 1337                         }
 1338                         vm_map_simplify_entry(map,entry);
 1339                 }
 1340         }
 1341         map->timestamp++;
 1342         vm_map_unlock(map);
 1343         return KERN_SUCCESS;
 1344 }
 1345 
 1346 /*
 1347  *      vm_map_pageable:
 1348  *
 1349  *      Sets the pageability of the specified address
 1350  *      range in the target map.  Regions specified
 1351  *      as not pageable require locked-down physical
 1352  *      memory and physical page maps.
 1353  *
 1354  *      The map must not be locked, but a reference
 1355  *      must remain to the map throughout the call.
 1356  */
 1357 int
 1358 vm_map_pageable(map, start, end, new_pageable)
 1359         vm_map_t map;
 1360         vm_offset_t start;
 1361         vm_offset_t end;
 1362         boolean_t new_pageable;
 1363 {
 1364         vm_map_entry_t entry;
 1365         vm_map_entry_t start_entry;
 1366         vm_offset_t failed = 0;
 1367         int rv;
 1368 
 1369         vm_map_lock(map);
 1370 
 1371         VM_MAP_RANGE_CHECK(map, start, end);
 1372 
 1373         /*
 1374          * Only one pageability change may take place at one time, since
 1375          * vm_fault assumes it will be called only once for each
 1376          * wiring/unwiring.  Therefore, we have to make sure we're actually
 1377          * changing the pageability for the entire region.  We do so before
 1378          * making any changes.
 1379          */
 1380 
 1381         if (vm_map_lookup_entry(map, start, &start_entry) == FALSE) {
 1382                 vm_map_unlock(map);
 1383                 return (KERN_INVALID_ADDRESS);
 1384         }
 1385         entry = start_entry;
 1386 
 1387         /*
 1388          * Actions are rather different for wiring and unwiring, so we have
 1389          * two separate cases.
 1390          */
 1391 
 1392         if (new_pageable) {
 1393 
 1394                 vm_map_clip_start(map, entry, start);
 1395 
 1396                 /*
 1397                  * Unwiring.  First ensure that the range to be unwired is
 1398                  * really wired down and that there are no holes.
 1399                  */
 1400                 while ((entry != &map->header) && (entry->start < end)) {
 1401 
 1402                         if (entry->wired_count == 0 ||
 1403                             (entry->end < end &&
 1404                                 (entry->next == &map->header ||
 1405                                     entry->next->start > entry->end))) {
 1406                                 vm_map_unlock(map);
 1407                                 return (KERN_INVALID_ARGUMENT);
 1408                         }
 1409                         entry = entry->next;
 1410                 }
 1411 
 1412                 /*
 1413                  * Now decrement the wiring count for each region. If a region
 1414                  * becomes completely unwired, unwire its physical pages and
 1415                  * mappings.
 1416                  */
 1417                 vm_map_set_recursive(map);
 1418 
 1419                 entry = start_entry;
 1420                 while ((entry != &map->header) && (entry->start < end)) {
 1421                         vm_map_clip_end(map, entry, end);
 1422 
 1423                         entry->wired_count--;
 1424                         if (entry->wired_count == 0)
 1425                                 vm_fault_unwire(map, entry->start, entry->end);
 1426 
 1427                         entry = entry->next;
 1428                 }
 1429                 vm_map_simplify_entry(map, start_entry);
 1430                 vm_map_clear_recursive(map);
 1431         } else {
 1432                 /*
 1433                  * Wiring.  We must do this in two passes:
 1434                  *
 1435                  * 1.  Holding the write lock, we create any shadow or zero-fill
 1436                  * objects that need to be created. Then we clip each map
 1437                  * entry to the region to be wired and increment its wiring
 1438                  * count.  We create objects before clipping the map entries
 1439                  * to avoid object proliferation.
 1440                  *
 1441                  * 2.  We downgrade to a read lock, and call vm_fault_wire to
 1442                  * fault in the pages for any newly wired area (wired_count is
 1443                  * 1).
 1444                  *
 1445                  * Downgrading to a read lock for vm_fault_wire avoids a possible
 1446                  * deadlock with another process that may have faulted on one
 1447                  * of the pages to be wired (it would mark the page busy,
 1448                  * blocking us, then in turn block on the map lock that we
 1449                  * hold).  Because of problems in the recursive lock package,
 1450                  * we cannot upgrade to a write lock in vm_map_lookup.  Thus,
 1451                  * any actions that require the write lock must be done
 1452                  * beforehand.  Because we keep the read lock on the map, the
 1453                  * copy-on-write status of the entries we modify here cannot
 1454                  * change.
 1455                  */
 1456 
 1457                 /*
 1458                  * Pass 1.
 1459                  */
 1460                 while ((entry != &map->header) && (entry->start < end)) {
 1461                         if (entry->wired_count == 0) {
 1462 
 1463                                 /*
 1464                                  * Perform actions of vm_map_lookup that need
 1465                                  * the write lock on the map: create a shadow
 1466                                  * object for a copy-on-write region, or an
 1467                                  * object for a zero-fill region.
 1468                                  *
 1469                                  * We don't have to do this for entries that
 1470                                  * point to sharing maps, because we won't
 1471                                  * hold the lock on the sharing map.
 1472                                  */
 1473                                 if ((entry->eflags & (MAP_ENTRY_IS_A_MAP|MAP_ENTRY_IS_SUB_MAP)) == 0) {
 1474                                         int copyflag = entry->eflags & MAP_ENTRY_NEEDS_COPY;
 1475                                         if (copyflag &&
 1476                                             ((entry->protection & VM_PROT_WRITE) != 0)) {
 1477 
 1478                                                 vm_object_shadow(&entry->object.vm_object,
 1479                                                     &entry->offset,
 1480                                                     atop(entry->end - entry->start));
 1481                                                 entry->eflags &= ~MAP_ENTRY_NEEDS_COPY;
 1482                                         } else if (entry->object.vm_object == NULL) {
 1483                                                 entry->object.vm_object =
 1484                                                     vm_object_allocate(OBJT_DEFAULT,
 1485                                                         atop(entry->end - entry->start));
 1486                                                 entry->offset = (vm_offset_t) 0;
 1487                                         }
 1488                                         default_pager_convert_to_swapq(entry->object.vm_object);
 1489                                 }
 1490                         }
 1491                         vm_map_clip_start(map, entry, start);
 1492                         vm_map_clip_end(map, entry, end);
 1493                         entry->wired_count++;
 1494 
 1495                         /*
 1496                          * Check for holes
 1497                          */
 1498                         if (entry->end < end &&
 1499                             (entry->next == &map->header ||
 1500                                 entry->next->start > entry->end)) {
 1501                                 /*
 1502                                  * Found one.  Object creation actions do not
 1503                                  * need to be undone, but the wired counts
 1504                                  * need to be restored.
 1505                                  */
 1506                                 while (entry != &map->header && entry->end > start) {
 1507                                         entry->wired_count--;
 1508                                         entry = entry->prev;
 1509                                 }
 1510                                 map->timestamp++;
 1511                                 vm_map_unlock(map);
 1512                                 return (KERN_INVALID_ARGUMENT);
 1513                         }
 1514                         entry = entry->next;
 1515                 }
 1516 
 1517                 /*
 1518                  * Pass 2.
 1519                  */
 1520 
 1521                 /*
 1522                  * HACK HACK HACK HACK
 1523                  *
 1524                  * If we are wiring in the kernel map or a submap of it,
 1525                  * unlock the map to avoid deadlocks.  We trust that the
 1526                  * kernel is well-behaved, and therefore will not do
 1527                  * anything destructive to this region of the map while
 1528                  * we have it unlocked.  We cannot trust user processes
 1529                  * to do the same.
 1530                  *
 1531                  * HACK HACK HACK HACK
 1532                  */
 1533                 if (vm_map_pmap(map) == kernel_pmap) {
 1534                         vm_map_unlock(map);     /* trust me ... */
 1535                 } else {
 1536                         vm_map_set_recursive(map);
 1537                         vm_map_lock_downgrade(map);
 1538                 }
 1539 
 1540                 rv = 0;
 1541                 entry = start_entry;
 1542                 while (entry != &map->header && entry->start < end) {
 1543                         /*
 1544                          * If vm_fault_wire fails for any page we need to undo
 1545                          * what has been done.  We decrement the wiring count
 1546                          * for those pages which have not yet been wired (now)
 1547                          * and unwire those that have (later).
 1548                          *
 1549                          * XXX this violates the locking protocol on the map,
 1550                          * needs to be fixed.
 1551                          */
 1552                         if (rv)
 1553                                 entry->wired_count--;
 1554                         else if (entry->wired_count == 1) {
 1555                                 rv = vm_fault_wire(map, entry->start, entry->end);
 1556                                 if (rv) {
 1557                                         failed = entry->start;
 1558                                         entry->wired_count--;
 1559                                 }
 1560                         }
 1561                         entry = entry->next;
 1562                 }
 1563 
 1564                 if (vm_map_pmap(map) == kernel_pmap) {
 1565                         vm_map_lock(map);
 1566                 } else {
 1567                         vm_map_clear_recursive(map);
 1568                 }
 1569                 if (rv) {
 1570                         vm_map_unlock(map);
 1571                         (void) vm_map_pageable(map, start, failed, TRUE);
 1572                         return (rv);
 1573                 }
 1574                 vm_map_simplify_entry(map, start_entry);
 1575         }
 1576 
 1577         vm_map_unlock(map);
 1578 
 1579         map->timestamp++;
 1580         return (KERN_SUCCESS);
 1581 }
 1582 
 1583 /*
 1584  * vm_map_clean
 1585  *
 1586  * Push any dirty cached pages in the address range to their pager.
 1587  * If syncio is TRUE, dirty pages are written synchronously.
 1588  * If invalidate is TRUE, any cached pages are freed as well.
 1589  *
 1590  * Returns an error if any part of the specified range is not mapped.
 1591  */
 1592 int
 1593 vm_map_clean(map, start, end, syncio, invalidate)
 1594         vm_map_t map;
 1595         vm_offset_t start;
 1596         vm_offset_t end;
 1597         boolean_t syncio;
 1598         boolean_t invalidate;
 1599 {
 1600         vm_map_entry_t current;
 1601         vm_map_entry_t entry;
 1602         vm_size_t size;
 1603         vm_object_t object;
 1604         vm_ooffset_t offset;
 1605 
 1606         vm_map_lock_read(map);
 1607         VM_MAP_RANGE_CHECK(map, start, end);
 1608         if (!vm_map_lookup_entry(map, start, &entry)) {
 1609                 vm_map_unlock_read(map);
 1610                 return (KERN_INVALID_ADDRESS);
 1611         }
 1612         /*
 1613          * Make a first pass to check for holes.
 1614          */
 1615         for (current = entry; current->start < end; current = current->next) {
 1616                 if (current->eflags & MAP_ENTRY_IS_SUB_MAP) {
 1617                         vm_map_unlock_read(map);
 1618                         return (KERN_INVALID_ARGUMENT);
 1619                 }
 1620                 if (end > current->end &&
 1621                     (current->next == &map->header ||
 1622                         current->end != current->next->start)) {
 1623                         vm_map_unlock_read(map);
 1624                         return (KERN_INVALID_ADDRESS);
 1625                 }
 1626         }
 1627 
 1628         if (invalidate)
 1629                 pmap_remove(vm_map_pmap(map), start, end);
 1630         /*
 1631          * Make a second pass, cleaning/uncaching pages from the indicated
 1632          * objects as we go.
 1633          */
 1634         for (current = entry; current->start < end; current = current->next) {
 1635                 offset = current->offset + (start - current->start);
 1636                 size = (end <= current->end ? end : current->end) - start;
 1637                 if (current->eflags & (MAP_ENTRY_IS_A_MAP|MAP_ENTRY_IS_SUB_MAP)) {
 1638                         vm_map_t smap;
 1639                         vm_map_entry_t tentry;
 1640                         vm_size_t tsize;
 1641 
 1642                         smap = current->object.share_map;
 1643                         vm_map_lock_read(smap);
 1644                         (void) vm_map_lookup_entry(smap, offset, &tentry);
 1645                         tsize = tentry->end - offset;
 1646                         if (tsize < size)
 1647                                 size = tsize;
 1648                         object = tentry->object.vm_object;
 1649                         offset = tentry->offset + (offset - tentry->start);
 1650                         vm_map_unlock_read(smap);
 1651                 } else {
 1652                         object = current->object.vm_object;
 1653                 }
 1654                 /*
 1655                  * Note that there is absolutely no sense in writing out
 1656                  * anonymous objects, so we track down the vnode object
 1657                  * to write out.
 1658                  * We invalidate (remove) all pages from the address space
 1659                  * anyway, for semantic correctness.
 1660                  */
 1661                 while (object->backing_object) {
 1662                         object = object->backing_object;
 1663                         offset += object->backing_object_offset;
 1664                         if (object->size < OFF_TO_IDX( offset + size))
 1665                                 size = IDX_TO_OFF(object->size) - offset;
 1666                 }
 1667                 if (object && (object->type == OBJT_VNODE) && 
 1668                     (current->protection & VM_PROT_WRITE)) {
 1669                         /*
 1670                          * Flush pages if writing is allowed, invalidate them
 1671                          * if invalidation requested.  Pages undergoing I/O
 1672                          * will be ignored by vm_object_page_remove().
 1673                          *
 1674                          * We cannot lock the vnode and then wait for paging
 1675                          * to complete without deadlocking against vm_fault.
 1676                          * Instead we simply call vm_object_page_remove() and
 1677                          * allow it to block internally on a page-by-page 
 1678                          * basis when it encounters pages undergoing async 
 1679                          * I/O.
 1680                          */
 1681                         int flags;
 1682 
 1683                         vm_object_reference(object);
 1684                         vn_lock(object->handle, LK_EXCLUSIVE | LK_RETRY, curproc);
 1685                         flags = (syncio || invalidate) ? OBJPC_SYNC : 0;
 1686                         flags |= invalidate ? OBJPC_INVAL : 0;
 1687                         vm_object_page_clean(object,
 1688                             OFF_TO_IDX(offset),
 1689                             OFF_TO_IDX(offset + size + PAGE_MASK),
 1690                             flags);
 1691                         if (invalidate) {
 1692                                 /*vm_object_pip_wait(object, "objmcl");*/
 1693                                 vm_object_page_remove(object,
 1694                                     OFF_TO_IDX(offset),
 1695                                     OFF_TO_IDX(offset + size + PAGE_MASK),
 1696                                     FALSE);
 1697                         }
 1698                         VOP_UNLOCK(object->handle, 0, curproc);
 1699                         vm_object_deallocate(object);
 1700                 }
 1701                 start += size;
 1702         }
 1703 
 1704         vm_map_unlock_read(map);
 1705         return (KERN_SUCCESS);
 1706 }
 1707 
 1708 /*
 1709  *      vm_map_entry_unwire:    [ internal use only ]
 1710  *
 1711  *      Make the region specified by this entry pageable.
 1712  *
 1713  *      The map in question should be locked.
 1714  *      [This is the reason for this routine's existence.]
 1715  */
 1716 static void 
 1717 vm_map_entry_unwire(map, entry)
 1718         vm_map_t map;
 1719         vm_map_entry_t entry;
 1720 {
 1721         vm_fault_unwire(map, entry->start, entry->end);
 1722         entry->wired_count = 0;
 1723 }
 1724 
 1725 /*
 1726  *      vm_map_entry_delete:    [ internal use only ]
 1727  *
 1728  *      Deallocate the given entry from the target map.
 1729  */
 1730 static void
 1731 vm_map_entry_delete(map, entry)
 1732         vm_map_t map;
 1733         vm_map_entry_t entry;
 1734 {
 1735         vm_map_entry_unlink(map, entry);
 1736         map->size -= entry->end - entry->start;
 1737 
 1738         if ((entry->eflags & (MAP_ENTRY_IS_A_MAP|MAP_ENTRY_IS_SUB_MAP)) == 0) {
 1739                 vm_object_deallocate(entry->object.vm_object);
 1740         }
 1741 
 1742         vm_map_entry_dispose(map, entry);
 1743 }
 1744 
 1745 /*
 1746  *      vm_map_delete:  [ internal use only ]
 1747  *
 1748  *      Deallocates the given address range from the target
 1749  *      map.
 1750  *
 1751  *      When called with a sharing map, removes pages from
 1752  *      that region from all physical maps.
 1753  */
 1754 int
 1755 vm_map_delete(map, start, end)
 1756         vm_map_t map;
 1757         vm_offset_t start;
 1758         vm_offset_t end;
 1759 {
 1760         vm_object_t object;
 1761         vm_map_entry_t entry;
 1762         vm_map_entry_t first_entry;
 1763 
 1764         /*
 1765          * Find the start of the region, and clip it
 1766          */
 1767 
 1768         if (!vm_map_lookup_entry(map, start, &first_entry))
 1769                 entry = first_entry->next;
 1770         else {
 1771                 entry = first_entry;
 1772                 vm_map_clip_start(map, entry, start);
 1773                 /*
 1774                  * Fix the lookup hint now, rather than each time though the
 1775                  * loop.
 1776                  */
 1777                 SAVE_HINT(map, entry->prev);
 1778         }
 1779 
 1780         /*
 1781          * Save the free space hint
 1782          */
 1783 
 1784         if (entry == &map->header) {
 1785                 map->first_free = &map->header;
 1786         } else if (map->first_free->start >= start) {
 1787                 map->first_free = entry->prev;
 1788         }
 1789 
 1790         /*
 1791          * Step through all entries in this region
 1792          */
 1793 
 1794         while ((entry != &map->header) && (entry->start < end)) {
 1795                 vm_map_entry_t next;
 1796                 vm_offset_t s, e;
 1797                 vm_pindex_t offidxstart, offidxend, count;
 1798 
 1799                 vm_map_clip_end(map, entry, end);
 1800 
 1801                 s = entry->start;
 1802                 e = entry->end;
 1803                 next = entry->next;
 1804 
 1805                 offidxstart = OFF_TO_IDX(entry->offset);
 1806                 count = OFF_TO_IDX(e - s);
 1807                 object = entry->object.vm_object;
 1808 
 1809                 /*
 1810                  * Unwire before removing addresses from the pmap; otherwise,
 1811                  * unwiring will put the entries back in the pmap.
 1812                  */
 1813                 if (entry->wired_count != 0) {
 1814                         vm_map_entry_unwire(map, entry);
 1815                 }
 1816 
 1817                 offidxend = offidxstart + count;
 1818                 /*
 1819                  * If this is a sharing map, we must remove *all* references
 1820                  * to this data, since we can't find all of the physical maps
 1821                  * which are sharing it.
 1822                  */
 1823 
 1824                 if ((object == kernel_object) || (object == kmem_object)) {
 1825                         vm_object_page_remove(object, offidxstart, offidxend, FALSE);
 1826                 } else if (!map->is_main_map) {
 1827                         vm_object_pmap_remove(object, offidxstart, offidxend);
 1828                 } else {
 1829                         pmap_remove(map->pmap, s, e);
 1830                         if (object != NULL &&
 1831                             object->ref_count != 1 &&
 1832                             (object->flags & (OBJ_NOSPLIT|OBJ_ONEMAPPING)) == OBJ_ONEMAPPING &&
 1833                             (object->type == OBJT_SWAP || object->type == OBJT_DEFAULT)) {
 1834                                 vm_object_collapse(object);
 1835                                 vm_object_page_remove(object, offidxstart, offidxend, FALSE);
 1836                                 if (object->type == OBJT_SWAP) {
 1837                                         swap_pager_freespace(object, offidxstart, count);
 1838                                 }
 1839 
 1840                                 if ((offidxend >= object->size) &&
 1841                                         (offidxstart < object->size)) {
 1842                                                 object->size = offidxstart;
 1843                                 }
 1844                         }
 1845                 }
 1846 
 1847                 /*
 1848                  * Delete the entry (which may delete the object) only after
 1849                  * removing all pmap entries pointing to its pages.
 1850                  * (Otherwise, its page frames may be reallocated, and any
 1851                  * modify bits will be set in the wrong object!)
 1852                  */
 1853                 vm_map_entry_delete(map, entry);
 1854                 entry = next;
 1855         }
 1856         return (KERN_SUCCESS);
 1857 }
 1858 
 1859 /*
 1860  *      vm_map_remove:
 1861  *
 1862  *      Remove the given address range from the target map.
 1863  *      This is the exported form of vm_map_delete.
 1864  */
 1865 int
 1866 vm_map_remove(map, start, end)
 1867         vm_map_t map;
 1868         vm_offset_t start;
 1869         vm_offset_t end;
 1870 {
 1871         int result, s = 0;
 1872 
 1873         if (map == kmem_map || map == mb_map)
 1874                 s = splvm();
 1875 
 1876         vm_map_lock(map);
 1877         VM_MAP_RANGE_CHECK(map, start, end);
 1878         result = vm_map_delete(map, start, end);
 1879         vm_map_unlock(map);
 1880 
 1881         if (map == kmem_map || map == mb_map)
 1882                 splx(s);
 1883 
 1884         return (result);
 1885 }
 1886 
 1887 /*
 1888  *      vm_map_check_protection:
 1889  *
 1890  *      Assert that the target map allows the specified
 1891  *      privilege on the entire address region given.
 1892  *      The entire region must be allocated.
 1893  */
 1894 boolean_t
 1895 vm_map_check_protection(vm_map_t map, vm_offset_t start, vm_offset_t end,
 1896                         vm_prot_t protection)
 1897 {
 1898         vm_map_entry_t entry;
 1899         vm_map_entry_t tmp_entry;
 1900 
 1901         if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
 1902                 return (FALSE);
 1903         }
 1904         entry = tmp_entry;
 1905 
 1906         while (start < end) {
 1907                 if (entry == &map->header) {
 1908                         return (FALSE);
 1909                 }
 1910                 /*
 1911                  * No holes allowed!
 1912                  */
 1913 
 1914                 if (start < entry->start) {
 1915                         return (FALSE);
 1916                 }
 1917                 /*
 1918                  * Check protection associated with entry.
 1919                  */
 1920 
 1921                 if ((entry->protection & protection) != protection) {
 1922                         return (FALSE);
 1923                 }
 1924                 /* go to next entry */
 1925 
 1926                 start = entry->end;
 1927                 entry = entry->next;
 1928         }
 1929         return (TRUE);
 1930 }
 1931 
 1932 /*
 1933  * Split the pages in a map entry into a new object.  This affords
 1934  * easier removal of unused pages, and keeps object inheritance from
 1935  * being a negative impact on memory usage.
 1936  */
 1937 static void
 1938 vm_map_split(entry)
 1939         vm_map_entry_t entry;
 1940 {
 1941         vm_page_t m;
 1942         vm_object_t orig_object, new_object, source;
 1943         vm_offset_t s, e;
 1944         vm_pindex_t offidxstart, offidxend, idx;
 1945         vm_size_t size;
 1946         vm_ooffset_t offset;
 1947 
 1948         orig_object = entry->object.vm_object;
 1949         if (orig_object->type != OBJT_DEFAULT && orig_object->type != OBJT_SWAP)
 1950                 return;
 1951         if (orig_object->ref_count <= 1)
 1952                 return;
 1953 
 1954         offset = entry->offset;
 1955         s = entry->start;
 1956         e = entry->end;
 1957 
 1958         offidxstart = OFF_TO_IDX(offset);
 1959         offidxend = offidxstart + OFF_TO_IDX(e - s);
 1960         size = offidxend - offidxstart;
 1961 
 1962         new_object = vm_pager_allocate(orig_object->type,
 1963                 NULL, IDX_TO_OFF(size), VM_PROT_ALL, 0LL);
 1964         if (new_object == NULL)
 1965                 return;
 1966 
 1967         source = orig_object->backing_object;
 1968         if (source != NULL) {
 1969                 vm_object_reference(source);    /* Referenced by new_object */
 1970                 TAILQ_INSERT_TAIL(&source->shadow_head,
 1971                                   new_object, shadow_list);
 1972                 vm_object_clear_flag(source, OBJ_ONEMAPPING);
 1973                 new_object->backing_object_offset = 
 1974                         orig_object->backing_object_offset + IDX_TO_OFF(offidxstart);
 1975                 new_object->backing_object = source;
 1976                 source->shadow_count++;
 1977                 source->generation++;
 1978         }
 1979 
 1980         for (idx = 0; idx < size; idx++) {
 1981                 vm_page_t m;
 1982 
 1983         retry:
 1984                 m = vm_page_lookup(orig_object, offidxstart + idx);
 1985                 if (m == NULL)
 1986                         continue;
 1987                 if (m->flags & PG_BUSY) {
 1988                         int s = splvm();
 1989                         if (m->flags & PG_BUSY) {
 1990                                 vm_page_flag_set(m, PG_WANTED);
 1991                                 tsleep(m, PVM, "spltwt", 0);
 1992                         }
 1993                         splx(s);
 1994                         goto retry;
 1995                 }
 1996                         
 1997                 vm_page_busy(m);
 1998                 vm_page_protect(m, VM_PROT_NONE);
 1999                 vm_page_rename(m, new_object, idx);
 2000                 /*
 2001                  * Cannot leave dirty page in PQ_CACHE,
 2002                  * deactivate it if necessary.
 2003                  */
 2004                 if (m->queue - m->pc == PQ_CACHE)
 2005                         vm_page_deactivate(m);
 2006                 m->dirty = VM_PAGE_BITS_ALL;
 2007                 vm_page_busy(m);
 2008         }
 2009 
 2010         if (orig_object->type == OBJT_SWAP) {
 2011                 vm_object_pip_add(orig_object, 1);
 2012                 /*
 2013                  * copy orig_object pages into new_object
 2014                  * and destroy unneeded pages in
 2015                  * shadow object.
 2016                  */
 2017                 swap_pager_copy(orig_object, OFF_TO_IDX(orig_object->paging_offset),
 2018                     new_object, OFF_TO_IDX(new_object->paging_offset),
 2019                         offidxstart, 0);
 2020                 vm_object_pip_wakeup(orig_object);
 2021         }
 2022 
 2023         for (idx = 0; idx < size; idx++) {
 2024                 m = vm_page_lookup(new_object, idx);
 2025                 if (m) {
 2026                         vm_page_wakeup(m);
 2027                 }
 2028         }
 2029 
 2030         entry->object.vm_object = new_object;
 2031         entry->offset = 0LL;
 2032         vm_object_deallocate(orig_object);
 2033 }
 2034 
 2035 /*
 2036  *      vm_map_copy_entry:
 2037  *
 2038  *      Copies the contents of the source entry to the destination
 2039  *      entry.  The entries *must* be aligned properly.
 2040  */
 2041 static void
 2042 vm_map_copy_entry(src_map, dst_map, src_entry, dst_entry)
 2043         vm_map_t src_map, dst_map;
 2044         vm_map_entry_t src_entry, dst_entry;
 2045 {
 2046         vm_object_t src_object;
 2047 
 2048         if ((dst_entry->eflags|src_entry->eflags) &
 2049                 (MAP_ENTRY_IS_A_MAP|MAP_ENTRY_IS_SUB_MAP))
 2050                 return;
 2051 
 2052         if (src_entry->wired_count == 0) {
 2053 
 2054                 /*
 2055                  * If the source entry is marked needs_copy, it is already
 2056                  * write-protected.
 2057                  */
 2058                 if ((src_entry->eflags & MAP_ENTRY_NEEDS_COPY) == 0) {
 2059                         pmap_protect(src_map->pmap,
 2060                             src_entry->start,
 2061                             src_entry->end,
 2062                             src_entry->protection & ~VM_PROT_WRITE);
 2063                 }
 2064 
 2065                 /*
 2066                  * Make a copy of the object.
 2067                  */
 2068                 if (src_object = src_entry->object.vm_object) {
 2069 
 2070                         if ((src_object->handle == NULL) &&
 2071                                 (src_object->type == OBJT_DEFAULT ||
 2072                                  src_object->type == OBJT_SWAP)) {
 2073                                 vm_object_collapse(src_object);
 2074                                 if ((src_object->flags & (OBJ_NOSPLIT|OBJ_ONEMAPPING)) == OBJ_ONEMAPPING) {
 2075                                         vm_map_split(src_entry);
 2076                                         src_map->timestamp++;
 2077                                         src_object = src_entry->object.vm_object;
 2078                                 }
 2079                         }
 2080 
 2081                         vm_object_reference(src_object);
 2082                         vm_object_clear_flag(src_object, OBJ_ONEMAPPING);
 2083                         dst_entry->object.vm_object = src_object;
 2084                         src_entry->eflags |= (MAP_ENTRY_COW|MAP_ENTRY_NEEDS_COPY);
 2085                         dst_entry->eflags |= (MAP_ENTRY_COW|MAP_ENTRY_NEEDS_COPY);
 2086                         dst_entry->offset = src_entry->offset;
 2087                 } else {
 2088                         dst_entry->object.vm_object = NULL;
 2089                         dst_entry->offset = 0;
 2090                 }
 2091 
 2092                 pmap_copy(dst_map->pmap, src_map->pmap, dst_entry->start,
 2093                     dst_entry->end - dst_entry->start, src_entry->start);
 2094         } else {
 2095                 /*
 2096                  * Of course, wired down pages can't be set copy-on-write.
 2097                  * Cause wired pages to be copied into the new map by
 2098                  * simulating faults (the new pages are pageable)
 2099                  */
 2100                 vm_fault_copy_entry(dst_map, src_map, dst_entry, src_entry);
 2101         }
 2102 }
 2103 
 2104 /*
 2105  * vmspace_fork:
 2106  * Create a new process vmspace structure and vm_map
 2107  * based on those of an existing process.  The new map
 2108  * is based on the old map, according to the inheritance
 2109  * values on the regions in that map.
 2110  *
 2111  * The source map must not be locked.
 2112  */
 2113 struct vmspace *
 2114 vmspace_fork(vm1)
 2115         struct vmspace *vm1;
 2116 {
 2117         struct vmspace *vm2;
 2118         vm_map_t old_map = &vm1->vm_map;
 2119         vm_map_t new_map;
 2120         vm_map_entry_t old_entry;
 2121         vm_map_entry_t new_entry;
 2122         vm_object_t object;
 2123 
 2124         vm_map_lock(old_map);
 2125 
 2126         vm2 = vmspace_alloc(old_map->min_offset, old_map->max_offset);
 2127         bcopy(&vm1->vm_startcopy, &vm2->vm_startcopy,
 2128             (caddr_t) (vm1 + 1) - (caddr_t) &vm1->vm_startcopy);
 2129         new_map = &vm2->vm_map; /* XXX */
 2130         new_map->timestamp = 1;
 2131 
 2132         old_entry = old_map->header.next;
 2133 
 2134         while (old_entry != &old_map->header) {
 2135                 if (old_entry->eflags & MAP_ENTRY_IS_SUB_MAP)
 2136                         panic("vm_map_fork: encountered a submap");
 2137 
 2138                 switch (old_entry->inheritance) {
 2139                 case VM_INHERIT_NONE:
 2140                         break;
 2141 
 2142                 case VM_INHERIT_SHARE:
 2143                         /*
 2144                          * Clone the entry, creating the shared object if necessary.
 2145                          */
 2146                         object = old_entry->object.vm_object;
 2147                         if (object == NULL) {
 2148                                 object = vm_object_allocate(OBJT_DEFAULT,
 2149                                         atop(old_entry->end - old_entry->start));
 2150                                 old_entry->object.vm_object = object;
 2151                                 old_entry->offset = (vm_offset_t) 0;
 2152                         } else if (old_entry->eflags & MAP_ENTRY_NEEDS_COPY) {
 2153                                 vm_object_shadow(&old_entry->object.vm_object,
 2154                                         &old_entry->offset,
 2155                                         atop(old_entry->end - old_entry->start));
 2156                                 old_entry->eflags &= ~MAP_ENTRY_NEEDS_COPY;
 2157                                 object = old_entry->object.vm_object;
 2158                         }
 2159                         vm_object_clear_flag(object, OBJ_ONEMAPPING);
 2160 
 2161                         /*
 2162                          * Clone the entry, referencing the sharing map.
 2163                          */
 2164                         new_entry = vm_map_entry_create(new_map);
 2165                         *new_entry = *old_entry;
 2166                         new_entry->wired_count = 0;
 2167                         vm_object_reference(object);
 2168 
 2169                         /*
 2170                          * Insert the entry into the new map -- we know we're
 2171                          * inserting at the end of the new map.
 2172                          */
 2173 
 2174                         vm_map_entry_link(new_map, new_map->header.prev,
 2175                             new_entry);
 2176 
 2177                         /*
 2178                          * Update the physical map
 2179                          */
 2180 
 2181                         pmap_copy(new_map->pmap, old_map->pmap,
 2182                             new_entry->start,
 2183                             (old_entry->end - old_entry->start),
 2184                             old_entry->start);
 2185                         break;
 2186 
 2187                 case VM_INHERIT_COPY:
 2188                         /*
 2189                          * Clone the entry and link into the map.
 2190                          */
 2191                         new_entry = vm_map_entry_create(new_map);
 2192                         *new_entry = *old_entry;
 2193                         new_entry->wired_count = 0;
 2194                         new_entry->object.vm_object = NULL;
 2195                         new_entry->eflags &= ~MAP_ENTRY_IS_A_MAP;
 2196                         vm_map_entry_link(new_map, new_map->header.prev,
 2197                             new_entry);
 2198                         vm_map_copy_entry(old_map, new_map, old_entry,
 2199                             new_entry);
 2200                         break;
 2201                 }
 2202                 old_entry = old_entry->next;
 2203         }
 2204 
 2205         new_map->size = old_map->size;
 2206         vm_map_unlock(old_map);
 2207         old_map->timestamp++;
 2208 
 2209         return (vm2);
 2210 }
 2211 
 2212 int
 2213 vm_map_stack (vm_map_t map, vm_offset_t addrbos, vm_size_t max_ssize,
 2214               vm_prot_t prot, vm_prot_t max, int cow)
 2215 {
 2216         vm_map_entry_t prev_entry;
 2217         vm_map_entry_t new_stack_entry;
 2218         vm_size_t      init_ssize;
 2219         int            rv;
 2220 
 2221         if (VM_MIN_ADDRESS > 0 && addrbos < VM_MIN_ADDRESS)
 2222                 return (KERN_NO_SPACE);
 2223 
 2224         if (max_ssize < SGROWSIZ)
 2225                 init_ssize = max_ssize;
 2226         else
 2227                 init_ssize = SGROWSIZ;
 2228 
 2229         vm_map_lock(map);
 2230 
 2231         /* If addr is already mapped, no go */
 2232         if (vm_map_lookup_entry(map, addrbos, &prev_entry)) {
 2233                 vm_map_unlock(map);
 2234                 return (KERN_NO_SPACE);
 2235         }
 2236 
 2237         /* If we can't accomodate max_ssize in the current mapping,
 2238          * no go.  However, we need to be aware that subsequent user
 2239          * mappings might map into the space we have reserved for
 2240          * stack, and currently this space is not protected.  
 2241          * 
 2242          * Hopefully we will at least detect this condition 
 2243          * when we try to grow the stack.
 2244          */
 2245         if ((prev_entry->next != &map->header) &&
 2246             (prev_entry->next->start < addrbos + max_ssize)) {
 2247                 vm_map_unlock(map);
 2248                 return (KERN_NO_SPACE);
 2249         }
 2250 
 2251         /* We initially map a stack of only init_ssize.  We will
 2252          * grow as needed later.  Since this is to be a grow 
 2253          * down stack, we map at the top of the range.
 2254          *
 2255          * Note: we would normally expect prot and max to be
 2256          * VM_PROT_ALL, and cow to be 0.  Possibly we should
 2257          * eliminate these as input parameters, and just
 2258          * pass these values here in the insert call.
 2259          */
 2260         rv = vm_map_insert(map, NULL, 0, addrbos + max_ssize - init_ssize,
 2261                            addrbos + max_ssize, prot, max, cow);
 2262 
 2263         /* Now set the avail_ssize amount */
 2264         if (rv == KERN_SUCCESS){
 2265                 if (prev_entry != &map->header)
 2266                         vm_map_clip_end(map, prev_entry, addrbos + max_ssize - init_ssize);
 2267                 new_stack_entry = prev_entry->next;
 2268                 if (new_stack_entry->end   != addrbos + max_ssize ||
 2269                     new_stack_entry->start != addrbos + max_ssize - init_ssize)
 2270                         panic ("Bad entry start/end for new stack entry");
 2271                 else 
 2272                         new_stack_entry->avail_ssize = max_ssize - init_ssize;
 2273         }
 2274 
 2275         vm_map_unlock(map);
 2276         return (rv);
 2277 }
 2278 
 2279 /* Attempts to grow a vm stack entry.  Returns KERN_SUCCESS if the
 2280  * desired address is already mapped, or if we successfully grow
 2281  * the stack.  Also returns KERN_SUCCESS if addr is outside the
 2282  * stack range (this is strange, but preserves compatibility with
 2283  * the grow function in vm_machdep.c).
 2284  */
 2285 int
 2286 vm_map_growstack (struct proc *p, vm_offset_t addr)
 2287 {
 2288         vm_map_entry_t prev_entry;
 2289         vm_map_entry_t stack_entry;
 2290         vm_map_entry_t new_stack_entry;
 2291         struct vmspace *vm = p->p_vmspace;
 2292         vm_map_t map = &vm->vm_map;
 2293         vm_offset_t    end;
 2294         int      grow_amount;
 2295         int      rv;
 2296         int      is_procstack;
 2297 Retry:
 2298         vm_map_lock_read(map);
 2299 
 2300         /* If addr is already in the entry range, no need to grow.*/
 2301         if (vm_map_lookup_entry(map, addr, &prev_entry)) {
 2302                 vm_map_unlock_read(map);
 2303                 return (KERN_SUCCESS);
 2304         }
 2305 
 2306         if ((stack_entry = prev_entry->next) == &map->header) {
 2307                 vm_map_unlock_read(map);
 2308                 return (KERN_SUCCESS);
 2309         } 
 2310         if (prev_entry == &map->header) 
 2311                 end = stack_entry->start - stack_entry->avail_ssize;
 2312         else
 2313                 end = prev_entry->end;
 2314 
 2315         /* This next test mimics the old grow function in vm_machdep.c.
 2316          * It really doesn't quite make sense, but we do it anyway
 2317          * for compatibility.
 2318          *
 2319          * If not growable stack, return success.  This signals the
 2320          * caller to proceed as he would normally with normal vm.
 2321          */
 2322         if (stack_entry->avail_ssize < 1 ||
 2323             addr >= stack_entry->start ||
 2324             addr <  stack_entry->start - stack_entry->avail_ssize) {
 2325                 vm_map_unlock_read(map);
 2326                 return (KERN_SUCCESS);
 2327         } 
 2328         
 2329         /* Find the minimum grow amount */
 2330         grow_amount = roundup (stack_entry->start - addr, PAGE_SIZE);
 2331         if (grow_amount > stack_entry->avail_ssize) {
 2332                 vm_map_unlock_read(map);
 2333                 return (KERN_NO_SPACE);
 2334         }
 2335 
 2336         /* If there is no longer enough space between the entries
 2337          * nogo, and adjust the available space.  Note: this 
 2338          * should only happen if the user has mapped into the
 2339          * stack area after the stack was created, and is
 2340          * probably an error.
 2341          *
 2342          * This also effectively destroys any guard page the user
 2343          * might have intended by limiting the stack size.
 2344          */
 2345         if (grow_amount > stack_entry->start - end) {
 2346                 if (vm_map_lock_upgrade(map))
 2347                         goto Retry;
 2348 
 2349                 stack_entry->avail_ssize = stack_entry->start - end;
 2350 
 2351                 vm_map_unlock(map);
 2352                 return (KERN_NO_SPACE);
 2353         }
 2354 
 2355         is_procstack = addr >= (vm_offset_t)vm->vm_maxsaddr;
 2356 
 2357         /* If this is the main process stack, see if we're over the 
 2358          * stack limit.
 2359          */
 2360         if (is_procstack && (ctob(vm->vm_ssize) + grow_amount >
 2361                              p->p_rlimit[RLIMIT_STACK].rlim_cur)) {
 2362                 vm_map_unlock_read(map);
 2363                 return (KERN_NO_SPACE);
 2364         }
 2365 
 2366         /* Round up the grow amount modulo SGROWSIZ */
 2367         grow_amount = roundup (grow_amount, SGROWSIZ);
 2368         if (grow_amount > stack_entry->avail_ssize) {
 2369                 grow_amount = stack_entry->avail_ssize;
 2370         }
 2371         if (is_procstack && (ctob(vm->vm_ssize) + grow_amount >
 2372                              p->p_rlimit[RLIMIT_STACK].rlim_cur)) {
 2373                 grow_amount = p->p_rlimit[RLIMIT_STACK].rlim_cur -
 2374                               ctob(vm->vm_ssize);
 2375         }
 2376 
 2377         if (vm_map_lock_upgrade(map))
 2378                 goto Retry;
 2379 
 2380         /* Get the preliminary new entry start value */
 2381         addr = stack_entry->start - grow_amount;
 2382 
 2383         /* If this puts us into the previous entry, cut back our growth
 2384          * to the available space.  Also, see the note above.
 2385          */
 2386         if (addr < end) {
 2387                 stack_entry->avail_ssize = stack_entry->start - end;
 2388                 addr = end;
 2389         }
 2390 
 2391         rv = vm_map_insert(map, NULL, 0, addr, stack_entry->start,
 2392                            VM_PROT_ALL,
 2393                            VM_PROT_ALL,
 2394                            0);
 2395 
 2396         /* Adjust the available stack space by the amount we grew. */
 2397         if (rv == KERN_SUCCESS) {
 2398                 if (prev_entry != &map->header)
 2399                         vm_map_clip_end(map, prev_entry, addr);
 2400                 new_stack_entry = prev_entry->next;
 2401                 if (new_stack_entry->end   != stack_entry->start  ||
 2402                     new_stack_entry->start != addr)
 2403                         panic ("Bad stack grow start/end in new stack entry");
 2404                 else {
 2405                         new_stack_entry->avail_ssize = stack_entry->avail_ssize -
 2406                                                         (new_stack_entry->end -
 2407                                                          new_stack_entry->start);
 2408                         if (is_procstack)
 2409                                 vm->vm_ssize += btoc(new_stack_entry->end -
 2410                                                      new_stack_entry->start);
 2411                 }
 2412         }
 2413 
 2414         vm_map_unlock(map);
 2415         return (rv);
 2416 
 2417 }
 2418 
 2419 /*
 2420  * Unshare the specified VM space for exec.  If other processes are
 2421  * mapped to it, then create a new one.  The new vmspace is null.
 2422  */
 2423 
 2424 void
 2425 vmspace_exec(struct proc *p) {
 2426         struct vmspace *oldvmspace = p->p_vmspace;
 2427         struct vmspace *newvmspace;
 2428         vm_map_t map = &p->p_vmspace->vm_map;
 2429 
 2430         newvmspace = vmspace_alloc(map->min_offset, map->max_offset);
 2431         bcopy(&oldvmspace->vm_startcopy, &newvmspace->vm_startcopy,
 2432             (caddr_t) (newvmspace + 1) - (caddr_t) &newvmspace->vm_startcopy);
 2433         /*
 2434          * This code is written like this for prototype purposes.  The
 2435          * goal is to avoid running down the vmspace here, but let the
 2436          * other process's that are still using the vmspace to finally
 2437          * run it down.  Even though there is little or no chance of blocking
 2438          * here, it is a good idea to keep this form for future mods.
 2439          */
 2440         vmspace_free(oldvmspace);
 2441         p->p_vmspace = newvmspace;
 2442         pmap_pinit2(vmspace_pmap(newvmspace));
 2443         if (p == curproc)
 2444                 pmap_activate(p);
 2445 }
 2446 
 2447 /*
 2448  * Unshare the specified VM space for forcing COW.  This
 2449  * is called by rfork, for the (RFMEM|RFPROC) == 0 case.
 2450  */
 2451 
 2452 void
 2453 vmspace_unshare(struct proc *p) {
 2454         struct vmspace *oldvmspace = p->p_vmspace;
 2455         struct vmspace *newvmspace;
 2456 
 2457         if (oldvmspace->vm_refcnt == 1)
 2458                 return;
 2459         newvmspace = vmspace_fork(oldvmspace);
 2460         vmspace_free(oldvmspace);
 2461         p->p_vmspace = newvmspace;
 2462         pmap_pinit2(vmspace_pmap(newvmspace));
 2463         if (p == curproc)
 2464                 pmap_activate(p);
 2465 }
 2466         
 2467 
 2468 /*
 2469  *      vm_map_lookup:
 2470  *
 2471  *      Finds the VM object, offset, and
 2472  *      protection for a given virtual address in the
 2473  *      specified map, assuming a page fault of the
 2474  *      type specified.
 2475  *
 2476  *      Leaves the map in question locked for read; return
 2477  *      values are guaranteed until a vm_map_lookup_done
 2478  *      call is performed.  Note that the map argument
 2479  *      is in/out; the returned map must be used in
 2480  *      the call to vm_map_lookup_done.
 2481  *
 2482  *      A handle (out_entry) is returned for use in
 2483  *      vm_map_lookup_done, to make that fast.
 2484  *
 2485  *      If a lookup is requested with "write protection"
 2486  *      specified, the map may be changed to perform virtual
 2487  *      copying operations, although the data referenced will
 2488  *      remain the same.
 2489  */
 2490 int
 2491 vm_map_lookup(vm_map_t *var_map,                /* IN/OUT */
 2492               vm_offset_t vaddr,
 2493               vm_prot_t fault_typea,
 2494               vm_map_entry_t *out_entry,        /* OUT */
 2495               vm_object_t *object,              /* OUT */
 2496               vm_pindex_t *pindex,              /* OUT */
 2497               vm_prot_t *out_prot,              /* OUT */
 2498               boolean_t *wired)                 /* OUT */
 2499 {
 2500         vm_map_t share_map;
 2501         vm_offset_t share_offset;
 2502         vm_map_entry_t entry;
 2503         vm_map_t map = *var_map;
 2504         vm_prot_t prot;
 2505         boolean_t su;
 2506         vm_prot_t fault_type = fault_typea;
 2507 
 2508 RetryLookup:;
 2509 
 2510         /*
 2511          * Lookup the faulting address.
 2512          */
 2513 
 2514         vm_map_lock_read(map);
 2515 
 2516 #define RETURN(why) \
 2517                 { \
 2518                 vm_map_unlock_read(map); \
 2519                 return(why); \
 2520                 }
 2521 
 2522         /*
 2523          * If the map has an interesting hint, try it before calling full
 2524          * blown lookup routine.
 2525          */
 2526 
 2527         entry = map->hint;
 2528 
 2529         *out_entry = entry;
 2530 
 2531         if ((entry == &map->header) ||
 2532             (vaddr < entry->start) || (vaddr >= entry->end)) {
 2533                 vm_map_entry_t tmp_entry;
 2534 
 2535                 /*
 2536                  * Entry was either not a valid hint, or the vaddr was not
 2537                  * contained in the entry, so do a full lookup.
 2538                  */
 2539                 if (!vm_map_lookup_entry(map, vaddr, &tmp_entry))
 2540                         RETURN(KERN_INVALID_ADDRESS);
 2541 
 2542                 entry = tmp_entry;
 2543                 *out_entry = entry;
 2544         }
 2545         
 2546         /*
 2547          * Handle submaps.
 2548          */
 2549 
 2550         if (entry->eflags & MAP_ENTRY_IS_SUB_MAP) {
 2551                 vm_map_t old_map = map;
 2552 
 2553                 *var_map = map = entry->object.sub_map;
 2554                 vm_map_unlock_read(old_map);
 2555                 goto RetryLookup;
 2556         }
 2557 
 2558         /*
 2559          * Check whether this task is allowed to have this page.
 2560          * Note the special case for MAP_ENTRY_COW
 2561          * pages with an override.  This is to implement a forced
 2562          * COW for debuggers.
 2563          */
 2564 
 2565         if (fault_type & VM_PROT_OVERRIDE_WRITE)
 2566                 prot = entry->max_protection;
 2567         else
 2568                 prot = entry->protection;
 2569 
 2570         fault_type &= (VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE);
 2571         if ((fault_type & prot) != fault_type) {
 2572                         RETURN(KERN_PROTECTION_FAILURE);
 2573         }
 2574 
 2575         if (entry->wired_count && (fault_type & VM_PROT_WRITE) &&
 2576                         (entry->eflags & MAP_ENTRY_COW) &&
 2577                         (fault_typea & VM_PROT_OVERRIDE_WRITE) == 0) {
 2578                         RETURN(KERN_PROTECTION_FAILURE);
 2579         }
 2580 
 2581         /*
 2582          * If this page is not pageable, we have to get it for all possible
 2583          * accesses.
 2584          */
 2585 
 2586         *wired = (entry->wired_count != 0);
 2587         if (*wired)
 2588                 prot = fault_type = entry->protection;
 2589 
 2590         /*
 2591          * If we don't already have a VM object, track it down.
 2592          */
 2593 
 2594         su = (entry->eflags & MAP_ENTRY_IS_A_MAP) == 0;
 2595         if (su) {
 2596                 share_map = map;
 2597                 share_offset = vaddr;
 2598         } else {
 2599                 vm_map_entry_t share_entry;
 2600 
 2601                 /*
 2602                  * Compute the sharing map, and offset into it.
 2603                  */
 2604 
 2605                 share_map = entry->object.share_map;
 2606                 share_offset = (vaddr - entry->start) + entry->offset;
 2607 
 2608                 /*
 2609                  * Look for the backing store object and offset
 2610                  */
 2611 
 2612                 vm_map_lock_read(share_map);
 2613 
 2614                 if (!vm_map_lookup_entry(share_map, share_offset,
 2615                         &share_entry)) {
 2616                         vm_map_unlock_read(share_map);
 2617                         RETURN(KERN_INVALID_ADDRESS);
 2618                 }
 2619                 entry = share_entry;
 2620         }
 2621 
 2622         /*
 2623          * If the entry was copy-on-write, we either ...
 2624          */
 2625 
 2626         if (entry->eflags & MAP_ENTRY_NEEDS_COPY) {
 2627                 /*
 2628                  * If we want to write the page, we may as well handle that
 2629                  * now since we've got the sharing map locked.
 2630                  *
 2631                  * If we don't need to write the page, we just demote the
 2632                  * permissions allowed.
 2633                  */
 2634 
 2635                 if (fault_type & VM_PROT_WRITE) {
 2636                         /*
 2637                          * Make a new object, and place it in the object
 2638                          * chain.  Note that no new references have appeared
 2639                          * -- one just moved from the share map to the new
 2640                          * object.
 2641                          */
 2642 
 2643                         if (vm_map_lock_upgrade(share_map)) {
 2644                                 if (share_map != map)
 2645                                         vm_map_unlock_read(map);
 2646 
 2647                                 goto RetryLookup;
 2648                         }
 2649                         vm_object_shadow(
 2650                             &entry->object.vm_object,
 2651                             &entry->offset,
 2652                             atop(entry->end - entry->start));
 2653 
 2654                         entry->eflags &= ~MAP_ENTRY_NEEDS_COPY;
 2655                         vm_map_lock_downgrade(share_map);
 2656                 } else {
 2657                         /*
 2658                          * We're attempting to read a copy-on-write page --
 2659                          * don't allow writes.
 2660                          */
 2661 
 2662                         prot &= ~VM_PROT_WRITE;
 2663                 }
 2664         }
 2665 
 2666         /*
 2667          * Create an object if necessary.
 2668          */
 2669         if (entry->object.vm_object == NULL) {
 2670 
 2671                 if (vm_map_lock_upgrade(share_map)) {
 2672                         if (share_map != map)
 2673                                 vm_map_unlock_read(map);
 2674                         goto RetryLookup;
 2675                 }
 2676                 entry->object.vm_object = vm_object_allocate(OBJT_DEFAULT,
 2677                     atop(entry->end - entry->start));
 2678                 entry->offset = 0;
 2679                 vm_map_lock_downgrade(share_map);
 2680         }
 2681 
 2682         if (entry->object.vm_object->type == OBJT_DEFAULT)
 2683                 default_pager_convert_to_swapq(entry->object.vm_object);
 2684         /*
 2685          * Return the object/offset from this entry.  If the entry was
 2686          * copy-on-write or empty, it has been fixed up.
 2687          */
 2688 
 2689         *pindex = OFF_TO_IDX((share_offset - entry->start) + entry->offset);
 2690         *object = entry->object.vm_object;
 2691 
 2692         /*
 2693          * Return whether this is the only map sharing this data.
 2694          */
 2695 
 2696         *out_prot = prot;
 2697         return (KERN_SUCCESS);
 2698 
 2699 #undef  RETURN
 2700 }
 2701 
 2702 /*
 2703  *      vm_map_lookup_done:
 2704  *
 2705  *      Releases locks acquired by a vm_map_lookup
 2706  *      (according to the handle returned by that lookup).
 2707  */
 2708 
 2709 void
 2710 vm_map_lookup_done(map, entry)
 2711         vm_map_t map;
 2712         vm_map_entry_t entry;
 2713 {
 2714         /*
 2715          * If this entry references a map, unlock it first.
 2716          */
 2717 
 2718         if (entry->eflags & MAP_ENTRY_IS_A_MAP)
 2719                 vm_map_unlock_read(entry->object.share_map);
 2720 
 2721         /*
 2722          * Unlock the main-level map
 2723          */
 2724 
 2725         vm_map_unlock_read(map);
 2726 }
 2727 
 2728 /*
 2729  * Implement uiomove with VM operations.  This handles (and collateral changes)
 2730  * support every combination of source object modification, and COW type
 2731  * operations.
 2732  */
 2733 int
 2734 vm_uiomove(mapa, srcobject, cp, cnta, uaddra, npages)
 2735         vm_map_t mapa;
 2736         vm_object_t srcobject;
 2737         off_t cp;
 2738         int cnta;
 2739         vm_offset_t uaddra;
 2740         int *npages;
 2741 {
 2742         vm_map_t map;
 2743         vm_object_t first_object, oldobject, object;
 2744         vm_map_entry_t entry;
 2745         vm_prot_t prot;
 2746         boolean_t wired;
 2747         int tcnt, rv;
 2748         vm_offset_t uaddr, start, end, tend;
 2749         vm_pindex_t first_pindex, osize, oindex;
 2750         off_t ooffset;
 2751         int cnt;
 2752 
 2753         if (npages)
 2754                 *npages = 0;
 2755 
 2756         cnt = cnta;
 2757         uaddr = uaddra;
 2758 
 2759         while (cnt > 0) {
 2760                 map = mapa;
 2761 
 2762                 if ((vm_map_lookup(&map, uaddr,
 2763                         VM_PROT_READ, &entry, &first_object,
 2764                         &first_pindex, &prot, &wired)) != KERN_SUCCESS) {
 2765                         return EFAULT;
 2766                 }
 2767 
 2768                 vm_map_clip_start(map, entry, uaddr);
 2769 
 2770                 tcnt = cnt;
 2771                 tend = uaddr + tcnt;
 2772                 if (tend > entry->end) {
 2773                         tcnt = entry->end - uaddr;
 2774                         tend = entry->end;
 2775                 }
 2776 
 2777                 vm_map_clip_end(map, entry, tend);
 2778 
 2779                 start = entry->start;
 2780                 end = entry->end;
 2781 
 2782                 osize = atop(tcnt);
 2783 
 2784                 oindex = OFF_TO_IDX(cp);
 2785                 if (npages) {
 2786                         vm_pindex_t idx;
 2787                         for (idx = 0; idx < osize; idx++) {
 2788                                 vm_page_t m;
 2789                                 if ((m = vm_page_lookup(srcobject, oindex + idx)) == NULL) {
 2790                                         vm_map_lookup_done(map, entry);
 2791                                         return 0;
 2792                                 }
 2793                                 if ((m->flags & PG_BUSY) ||
 2794                                         ((m->valid & VM_PAGE_BITS_ALL) != VM_PAGE_BITS_ALL)) {
 2795                                         vm_map_lookup_done(map, entry);
 2796                                         return 0;
 2797                                 }
 2798                         }
 2799                 }
 2800 
 2801 /*
 2802  * If we are changing an existing map entry, just redirect
 2803  * the object, and change mappings.
 2804  */
 2805                 if ((first_object->type == OBJT_VNODE) &&
 2806                         ((oldobject = entry->object.vm_object) == first_object)) {
 2807 
 2808                         if ((entry->offset != cp) || (oldobject != srcobject)) {
 2809                                 /*
 2810                                 * Remove old window into the file
 2811                                 */
 2812                                 pmap_remove (map->pmap, uaddr, tend);
 2813 
 2814                                 /*
 2815                                 * Force copy on write for mmaped regions
 2816                                 */
 2817                                 vm_object_pmap_copy_1 (srcobject, oindex, oindex + osize);
 2818 
 2819                                 /*
 2820                                 * Point the object appropriately
 2821                                 */
 2822                                 if (oldobject != srcobject) {
 2823 
 2824                                 /*
 2825                                 * Set the object optimization hint flag
 2826                                 */
 2827                                         vm_object_set_flag(srcobject, OBJ_OPT);
 2828                                         vm_object_reference(srcobject);
 2829                                         entry->object.vm_object = srcobject;
 2830 
 2831                                         if (oldobject) {
 2832                                                 vm_object_deallocate(oldobject);
 2833                                         }
 2834                                 }
 2835 
 2836                                 entry->offset = cp;
 2837                                 map->timestamp++;
 2838                         } else {
 2839                                 pmap_remove (map->pmap, uaddr, tend);
 2840                         }
 2841 
 2842                 } else if ((first_object->ref_count == 1) &&
 2843                         (first_object->size == osize) &&
 2844                         ((first_object->type == OBJT_DEFAULT) ||
 2845                                 (first_object->type == OBJT_SWAP)) ) {
 2846 
 2847                         oldobject = first_object->backing_object;
 2848 
 2849                         if ((first_object->backing_object_offset != cp) ||
 2850                                 (oldobject != srcobject)) {
 2851                                 /*
 2852                                 * Remove old window into the file
 2853                                 */
 2854                                 pmap_remove (map->pmap, uaddr, tend);
 2855 
 2856                                 /*
 2857                                  * Remove unneeded old pages
 2858                                  */
 2859                                 if (first_object->resident_page_count) {
 2860                                         vm_object_page_remove (first_object, 0, 0, 0);
 2861                                 }
 2862 
 2863                                 /*
 2864                                  * Invalidate swap space
 2865                                  */
 2866                                 if (first_object->type == OBJT_SWAP) {
 2867                                         swap_pager_freespace(first_object,
 2868                                                 OFF_TO_IDX(first_object->paging_offset),
 2869                                                 first_object->size);
 2870                                 }
 2871 
 2872                                 /*
 2873                                 * Force copy on write for mmaped regions
 2874                                 */
 2875                                 vm_object_pmap_copy_1 (srcobject, oindex, oindex + osize);
 2876 
 2877                                 /*
 2878                                 * Point the object appropriately
 2879                                 */
 2880                                 if (oldobject != srcobject) {
 2881 
 2882                                 /*
 2883                                 * Set the object optimization hint flag
 2884                                 */
 2885                                         vm_object_set_flag(srcobject, OBJ_OPT);
 2886                                         vm_object_reference(srcobject);
 2887 
 2888                                         if (oldobject) {
 2889                                                 TAILQ_REMOVE(&oldobject->shadow_head,
 2890                                                         first_object, shadow_list);
 2891                                                 oldobject->shadow_count--;
 2892                                                 vm_object_deallocate(oldobject);
 2893                                         }
 2894 
 2895                                         TAILQ_INSERT_TAIL(&srcobject->shadow_head,
 2896                                                 first_object, shadow_list);
 2897                                         srcobject->shadow_count++;
 2898 
 2899                                         first_object->backing_object = srcobject;
 2900                                 }
 2901                                 first_object->backing_object_offset = cp;
 2902                                 map->timestamp++;
 2903                         } else {
 2904                                 pmap_remove (map->pmap, uaddr, tend);
 2905                         }
 2906 /*
 2907  * Otherwise, we have to do a logical mmap.
 2908  */
 2909                 } else {
 2910 
 2911                         vm_object_set_flag(srcobject, OBJ_OPT);
 2912                         vm_object_reference(srcobject);
 2913 
 2914                         pmap_remove (map->pmap, uaddr, tend);
 2915 
 2916                         vm_object_pmap_copy_1 (srcobject, oindex, oindex + osize);
 2917                         vm_map_lock_upgrade(map);
 2918 
 2919                         if (entry == &map->header) {
 2920                                 map->first_free = &map->header;
 2921                         } else if (map->first_free->start >= start) {
 2922                                 map->first_free = entry->prev;
 2923                         }
 2924 
 2925                         SAVE_HINT(map, entry->prev);
 2926                         vm_map_entry_delete(map, entry);
 2927 
 2928                         object = srcobject;
 2929                         ooffset = cp;
 2930 #if 0
 2931                         vm_object_shadow(&object, &ooffset, osize);
 2932 #endif
 2933 
 2934                         rv = vm_map_insert(map, object, ooffset, start, tend,
 2935                                 VM_PROT_ALL, VM_PROT_ALL, MAP_COPY_ON_WRITE|MAP_COPY_NEEDED);
 2936 
 2937                         if (rv != KERN_SUCCESS)
 2938                                 panic("vm_uiomove: could not insert new entry: %d", rv);
 2939                 }
 2940 
 2941 /*
 2942  * Map the window directly, if it is already in memory
 2943  */
 2944                 pmap_object_init_pt(map->pmap, uaddr,
 2945                         srcobject, oindex, tcnt, 0);
 2946 
 2947                 map->timestamp++;
 2948                 vm_map_unlock(map);
 2949 
 2950                 cnt -= tcnt;
 2951                 uaddr += tcnt;
 2952                 cp += tcnt;
 2953                 if (npages)
 2954                         *npages += osize;
 2955         }
 2956         return 0;
 2957 }
 2958 
 2959 /*
 2960  * Performs the copy_on_write operations necessary to allow the virtual copies
 2961  * into user space to work.  This has to be called for write(2) system calls
 2962  * from other processes, file unlinking, and file size shrinkage.
 2963  */
 2964 void
 2965 vm_freeze_copyopts(object, froma, toa)
 2966         vm_object_t object;
 2967         vm_pindex_t froma, toa;
 2968 {
 2969         int rv;
 2970         vm_object_t robject;
 2971         vm_pindex_t idx;
 2972 
 2973         if ((object == NULL) ||
 2974                 ((object->flags & OBJ_OPT) == 0))
 2975                 return;
 2976 
 2977         if (object->shadow_count > object->ref_count)
 2978                 panic("vm_freeze_copyopts: sc > rc");
 2979 
 2980         while( robject = TAILQ_FIRST(&object->shadow_head)) {
 2981                 vm_pindex_t bo_pindex;
 2982                 vm_page_t m_in, m_out;
 2983 
 2984                 bo_pindex = OFF_TO_IDX(robject->backing_object_offset);
 2985 
 2986                 vm_object_reference(robject);
 2987 
 2988                 vm_object_pip_wait(robject, "objfrz");
 2989 
 2990                 if (robject->ref_count == 1) {
 2991                         vm_object_deallocate(robject);
 2992                         continue;
 2993                 }
 2994 
 2995                 vm_object_pip_add(robject, 1);
 2996 
 2997                 for (idx = 0; idx < robject->size; idx++) {
 2998 
 2999                         m_out = vm_page_grab(robject, idx,
 3000                                                 VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
 3001 
 3002                         if (m_out->valid == 0) {
 3003                                 m_in = vm_page_grab(object, bo_pindex + idx,
 3004                                                 VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
 3005                                 if (m_in->valid == 0) {
 3006                                         rv = vm_pager_get_pages(object, &m_in, 1, 0);
 3007                                         if (rv != VM_PAGER_OK) {
 3008                                                 printf("vm_freeze_copyopts: cannot read page from file: %x\n", m_in->pindex);
 3009                                                 continue;
 3010                                         }
 3011                                         vm_page_deactivate(m_in);
 3012                                 }
 3013 
 3014                                 vm_page_protect(m_in, VM_PROT_NONE);
 3015                                 pmap_copy_page(VM_PAGE_TO_PHYS(m_in), VM_PAGE_TO_PHYS(m_out));
 3016                                 m_out->valid = m_in->valid;
 3017                                 m_out->dirty = VM_PAGE_BITS_ALL;
 3018 
 3019                                 vm_page_activate(m_out);
 3020 
 3021                                 vm_page_wakeup(m_in);
 3022                         }
 3023                         vm_page_wakeup(m_out);
 3024                 }
 3025 
 3026                 object->shadow_count--;
 3027                 object->ref_count--;
 3028                 TAILQ_REMOVE(&object->shadow_head, robject, shadow_list);
 3029                 robject->backing_object = NULL;
 3030                 robject->backing_object_offset = 0;
 3031 
 3032                 vm_object_pip_wakeup(robject);
 3033                 vm_object_deallocate(robject);
 3034         }
 3035 
 3036         vm_object_clear_flag(object, OBJ_OPT);
 3037 }
 3038 
 3039 #include "opt_ddb.h"
 3040 #ifdef DDB
 3041 #include <sys/kernel.h>
 3042 
 3043 #include <ddb/ddb.h>
 3044 
 3045 /*
 3046  *      vm_map_print:   [ debug ]
 3047  */
 3048 DB_SHOW_COMMAND(map, vm_map_print)
 3049 {
 3050         static int nlines;
 3051         /* XXX convert args. */
 3052         vm_map_t map = (vm_map_t)addr;
 3053         boolean_t full = have_addr;
 3054 
 3055         vm_map_entry_t entry;
 3056 
 3057         db_iprintf("%s map %p: pmap=%p, nentries=%d, version=%u\n",
 3058             (map->is_main_map ? "Task" : "Share"), (void *)map,
 3059             (void *)map->pmap, map->nentries, map->timestamp);
 3060         nlines++;
 3061 
 3062         if (!full && db_indent)
 3063                 return;
 3064 
 3065         db_indent += 2;
 3066         for (entry = map->header.next; entry != &map->header;
 3067             entry = entry->next) {
 3068 #if 0
 3069                 if (nlines > 18) {
 3070                         db_printf("--More--");
 3071                         cngetc();
 3072                         db_printf("\r");
 3073                         nlines = 0;
 3074                 }
 3075 #endif
 3076                 
 3077                 db_iprintf("map entry %p: start=%p, end=%p\n",
 3078                     (void *)entry, (void *)entry->start, (void *)entry->end);
 3079                 nlines++;
 3080                 if (map->is_main_map) {
 3081                         static char *inheritance_name[4] =
 3082                         {"share", "copy", "none", "donate_copy"};
 3083 
 3084                         db_iprintf(" prot=%x/%x/%s",
 3085                             entry->protection,
 3086                             entry->max_protection,
 3087                             inheritance_name[entry->inheritance]);
 3088                         if (entry->wired_count != 0)
 3089                                 db_printf(", wired");
 3090                 }
 3091                 if (entry->eflags & (MAP_ENTRY_IS_A_MAP|MAP_ENTRY_IS_SUB_MAP)) {
 3092                         /* XXX no %qd in kernel.  Truncate entry->offset. */
 3093                         db_printf(", share=%p, offset=0x%lx\n",
 3094                             (void *)entry->object.share_map,
 3095                             (long)entry->offset);
 3096                         nlines++;
 3097                         if ((entry->prev == &map->header) ||
 3098                             ((entry->prev->eflags & MAP_ENTRY_IS_A_MAP) == 0) ||
 3099                             (entry->prev->object.share_map !=
 3100                                 entry->object.share_map)) {
 3101                                 db_indent += 2;
 3102                                 vm_map_print((db_expr_t)(intptr_t)
 3103                                              entry->object.share_map,
 3104                                              full, 0, (char *)0);
 3105                                 db_indent -= 2;
 3106                         }
 3107                 } else {
 3108                         /* XXX no %qd in kernel.  Truncate entry->offset. */
 3109                         db_printf(", object=%p, offset=0x%lx",
 3110                             (void *)entry->object.vm_object,
 3111                             (long)entry->offset);
 3112                         if (entry->eflags & MAP_ENTRY_COW)
 3113                                 db_printf(", copy (%s)",
 3114                                     (entry->eflags & MAP_ENTRY_NEEDS_COPY) ? "needed" : "done");
 3115                         db_printf("\n");
 3116                         nlines++;
 3117 
 3118                         if ((entry->prev == &map->header) ||
 3119                             (entry->prev->eflags & MAP_ENTRY_IS_A_MAP) ||
 3120                             (entry->prev->object.vm_object !=
 3121                                 entry->object.vm_object)) {
 3122                                 db_indent += 2;
 3123                                 vm_object_print((db_expr_t)(intptr_t)
 3124                                                 entry->object.vm_object,
 3125                                                 full, 0, (char *)0);
 3126                                 nlines += 4;
 3127                                 db_indent -= 2;
 3128                         }
 3129                 }
 3130         }
 3131         db_indent -= 2;
 3132         if (db_indent == 0)
 3133                 nlines = 0;
 3134 }
 3135 
 3136 
 3137 DB_SHOW_COMMAND(procvm, procvm)
 3138 {
 3139         struct proc *p;
 3140 
 3141         if (have_addr) {
 3142                 p = (struct proc *) addr;
 3143         } else {
 3144                 p = curproc;
 3145         }
 3146 
 3147         db_printf("p = %p, vmspace = %p, map = %p, pmap = %p\n",
 3148             (void *)p, (void *)p->p_vmspace, (void *)&p->p_vmspace->vm_map,
 3149             (void *)&p->p_vmspace->vm_pmap);
 3150 
 3151         vm_map_print((db_expr_t)(intptr_t)&p->p_vmspace->vm_map, 1, 0, NULL);
 3152 }
 3153 
 3154 #endif /* DDB */

Cache object: 6f63df2a0811e88fb496cde0867c6d4d


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.