The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/vm/vm_map.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  * Copyright (c) 1991, 1993
    3  *      The Regents of the University of California.  All rights reserved.
    4  *
    5  * This code is derived from software contributed to Berkeley by
    6  * The Mach Operating System project at Carnegie-Mellon University.
    7  *
    8  * Redistribution and use in source and binary forms, with or without
    9  * modification, are permitted provided that the following conditions
   10  * are met:
   11  * 1. Redistributions of source code must retain the above copyright
   12  *    notice, this list of conditions and the following disclaimer.
   13  * 2. Redistributions in binary form must reproduce the above copyright
   14  *    notice, this list of conditions and the following disclaimer in the
   15  *    documentation and/or other materials provided with the distribution.
   16  * 3. All advertising materials mentioning features or use of this software
   17  *    must display the following acknowledgement:
   18  *      This product includes software developed by the University of
   19  *      California, Berkeley and its contributors.
   20  * 4. Neither the name of the University nor the names of its contributors
   21  *    may be used to endorse or promote products derived from this software
   22  *    without specific prior written permission.
   23  *
   24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   34  * SUCH DAMAGE.
   35  *
   36  *      from: @(#)vm_map.c      8.3 (Berkeley) 1/12/94
   37  *
   38  *
   39  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
   40  * All rights reserved.
   41  *
   42  * Authors: Avadis Tevanian, Jr., Michael Wayne Young
   43  *
   44  * Permission to use, copy, modify and distribute this software and
   45  * its documentation is hereby granted, provided that both the copyright
   46  * notice and this permission notice appear in all copies of the
   47  * software, derivative works or modified versions, and any portions
   48  * thereof, and that both notices appear in supporting documentation.
   49  *
   50  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
   51  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
   52  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
   53  *
   54  * Carnegie Mellon requests users of this software to return to
   55  *
   56  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
   57  *  School of Computer Science
   58  *  Carnegie Mellon University
   59  *  Pittsburgh PA 15213-3890
   60  *
   61  * any improvements or extensions that they make and grant Carnegie the
   62  * rights to redistribute these changes.
   63  *
   64  * $FreeBSD: src/sys/vm/vm_map.c,v 1.57.2.8 1999/09/05 08:24:28 peter Exp $
   65  */
   66 
   67 /*
   68  *      Virtual memory mapping module.
   69  */
   70 
   71 #include <sys/param.h>
   72 #include <sys/systm.h>
   73 #include <sys/malloc.h>
   74 #include <sys/proc.h>
   75 #include <sys/queue.h>
   76 #include <sys/vmmeter.h>
   77 #include <sys/mman.h>
   78 
   79 #include <vm/vm.h>
   80 #include <vm/vm_param.h>
   81 #include <vm/vm_prot.h>
   82 #include <vm/vm_inherit.h>
   83 #include <vm/lock.h>
   84 #include <vm/pmap.h>
   85 #include <vm/vm_map.h>
   86 #include <vm/vm_page.h>
   87 #include <vm/vm_object.h>
   88 #include <vm/vm_kern.h>
   89 #include <vm/vm_pager.h>
   90 #include <vm/vm_extern.h>
   91 #include <vm/default_pager.h>
   92 
   93 /*
   94  *      Virtual memory maps provide for the mapping, protection,
   95  *      and sharing of virtual memory objects.  In addition,
   96  *      this module provides for an efficient virtual copy of
   97  *      memory from one map to another.
   98  *
   99  *      Synchronization is required prior to most operations.
  100  *
  101  *      Maps consist of an ordered doubly-linked list of simple
  102  *      entries; a single hint is used to speed up lookups.
  103  *
  104  *      In order to properly represent the sharing of virtual
  105  *      memory regions among maps, the map structure is bi-level.
  106  *      Top-level ("address") maps refer to regions of sharable
  107  *      virtual memory.  These regions are implemented as
  108  *      ("sharing") maps, which then refer to the actual virtual
  109  *      memory objects.  When two address maps "share" memory,
  110  *      their top-level maps both have references to the same
  111  *      sharing map.  When memory is virtual-copied from one
  112  *      address map to another, the references in the sharing
  113  *      maps are actually copied -- no copying occurs at the
  114  *      virtual memory object level.
  115  *
  116  *      Since portions of maps are specified by start/end addreses,
  117  *      which may not align with existing map entries, all
  118  *      routines merely "clip" entries to these start/end values.
  119  *      [That is, an entry is split into two, bordering at a
  120  *      start or end value.]  Note that these clippings may not
  121  *      always be necessary (as the two resulting entries are then
  122  *      not changed); however, the clipping is done for convenience.
  123  *      No attempt is currently made to "glue back together" two
  124  *      abutting entries.
  125  *
  126  *      As mentioned above, virtual copy operations are performed
  127  *      by copying VM object references from one sharing map to
  128  *      another, and then marking both regions as copy-on-write.
  129  *      It is important to note that only one writeable reference
  130  *      to a VM object region exists in any map -- this means that
  131  *      shadow object creation can be delayed until a write operation
  132  *      occurs.
  133  */
  134 
  135 /*
  136  *      vm_map_startup:
  137  *
  138  *      Initialize the vm_map module.  Must be called before
  139  *      any other vm_map routines.
  140  *
  141  *      Map and entry structures are allocated from the general
  142  *      purpose memory pool with some exceptions:
  143  *
  144  *      - The kernel map and kmem submap are allocated statically.
  145  *      - Kernel map entries are allocated out of a static pool.
  146  *
  147  *      These restrictions are necessary since malloc() uses the
  148  *      maps and requires map entries.
  149  */
  150 
  151 vm_offset_t kentry_data;
  152 vm_size_t kentry_data_size;
  153 static vm_map_entry_t kentry_free;
  154 static vm_map_t kmap_free;
  155 extern char kstack[];
  156 extern int inmprotect;
  157 
  158 static int kentry_count;
  159 static vm_offset_t mapvm_start, mapvm, mapvmmax;
  160 static int mapvmpgcnt;
  161 
  162 static struct vm_map_entry *mappool;
  163 static int mappoolcnt;
  164 #define KENTRY_LOW_WATER 128
  165 
  166 static void _vm_map_clip_end __P((vm_map_t, vm_map_entry_t, vm_offset_t));
  167 static void _vm_map_clip_start __P((vm_map_t, vm_map_entry_t, vm_offset_t));
  168 static vm_map_entry_t vm_map_entry_create __P((vm_map_t));
  169 static void vm_map_entry_delete __P((vm_map_t, vm_map_entry_t));
  170 static void vm_map_entry_dispose __P((vm_map_t, vm_map_entry_t));
  171 static void vm_map_entry_unwire __P((vm_map_t, vm_map_entry_t));
  172 static void vm_map_copy_entry __P((vm_map_t, vm_map_t, vm_map_entry_t,
  173                 vm_map_entry_t));
  174 
  175 void
  176 vm_map_startup()
  177 {
  178         register int i;
  179         register vm_map_entry_t mep;
  180         vm_map_t mp;
  181 
  182         /*
  183          * Static map structures for allocation before initialization of
  184          * kernel map or kmem map.  vm_map_create knows how to deal with them.
  185          */
  186         kmap_free = mp = (vm_map_t) kentry_data;
  187         i = MAX_KMAP;
  188         while (--i > 0) {
  189                 mp->header.next = (vm_map_entry_t) (mp + 1);
  190                 mp++;
  191         }
  192         mp++->header.next = NULL;
  193 
  194         /*
  195          * Form a free list of statically allocated kernel map entries with
  196          * the rest.
  197          */
  198         kentry_free = mep = (vm_map_entry_t) mp;
  199         kentry_count = i = (kentry_data_size - MAX_KMAP * sizeof *mp) / sizeof *mep;
  200         while (--i > 0) {
  201                 mep->next = mep + 1;
  202                 mep++;
  203         }
  204         mep->next = NULL;
  205 }
  206 
  207 /*
  208  * Allocate a vmspace structure, including a vm_map and pmap,
  209  * and initialize those structures.  The refcnt is set to 1.
  210  * The remaining fields must be initialized by the caller.
  211  */
  212 struct vmspace *
  213 vmspace_alloc(min, max, pageable)
  214         vm_offset_t min, max;
  215         int pageable;
  216 {
  217         register struct vmspace *vm;
  218 
  219         if (mapvmpgcnt == 0 && mapvm == 0) {
  220                 mapvmpgcnt = (cnt.v_page_count * sizeof(struct vm_map_entry) + PAGE_SIZE - 1) / PAGE_SIZE;
  221                 mapvm_start = mapvm = kmem_alloc_pageable(kernel_map,
  222                         mapvmpgcnt * PAGE_SIZE);
  223                 mapvmmax = mapvm_start + mapvmpgcnt * PAGE_SIZE;
  224                 if (!mapvm)
  225                         mapvmpgcnt = 0;
  226         }
  227         MALLOC(vm, struct vmspace *, sizeof(struct vmspace), M_VMMAP, M_WAITOK);
  228         bzero(vm, (caddr_t) &vm->vm_startcopy - (caddr_t) vm);
  229         vm_map_init(&vm->vm_map, min, max, pageable);
  230         pmap_pinit(&vm->vm_pmap);
  231         vm->vm_map.pmap = &vm->vm_pmap;         /* XXX */
  232         vm->vm_refcnt = 1;
  233         return (vm);
  234 }
  235 
  236 void
  237 vmspace_free(vm)
  238         register struct vmspace *vm;
  239 {
  240 
  241         if (vm->vm_refcnt == 0)
  242                 panic("vmspace_free: attempt to free already freed vmspace");
  243 
  244         if (--vm->vm_refcnt == 0) {
  245 
  246                 /*
  247                  * Lock the map, to wait out all other references to it.
  248                  * Delete all of the mappings and pages they hold, then call
  249                  * the pmap module to reclaim anything left.
  250                  */
  251                 vm_map_lock(&vm->vm_map);
  252                 (void) vm_map_delete(&vm->vm_map, vm->vm_map.min_offset,
  253                     vm->vm_map.max_offset);
  254                 vm_map_unlock(&vm->vm_map);
  255 
  256                 while( vm->vm_map.ref_count != 1)
  257                         tsleep(&vm->vm_map.ref_count, PVM, "vmsfre", 0);
  258                 --vm->vm_map.ref_count;
  259                 vm_object_pmap_remove(vm->vm_upages_obj,
  260                         0, vm->vm_upages_obj->size);
  261                 vm_object_deallocate(vm->vm_upages_obj);
  262                 pmap_release(&vm->vm_pmap);
  263                 FREE(vm, M_VMMAP);
  264         } else {
  265                 wakeup(&vm->vm_map.ref_count);
  266         }
  267 }
  268 
  269 /*
  270  *      vm_map_create:
  271  *
  272  *      Creates and returns a new empty VM map with
  273  *      the given physical map structure, and having
  274  *      the given lower and upper address bounds.
  275  */
  276 vm_map_t
  277 vm_map_create(pmap, min, max, pageable)
  278         pmap_t pmap;
  279         vm_offset_t min, max;
  280         boolean_t pageable;
  281 {
  282         register vm_map_t result;
  283 
  284         if (kmem_map == NULL) {
  285                 result = kmap_free;
  286                 if (result == NULL)
  287                         panic("vm_map_create: out of maps");
  288                 kmap_free = (vm_map_t) result->header.next;
  289         } else
  290                 MALLOC(result, vm_map_t, sizeof(struct vm_map),
  291                     M_VMMAP, M_WAITOK);
  292 
  293         vm_map_init(result, min, max, pageable);
  294         result->pmap = pmap;
  295         return (result);
  296 }
  297 
  298 /*
  299  * Initialize an existing vm_map structure
  300  * such as that in the vmspace structure.
  301  * The pmap is set elsewhere.
  302  */
  303 void
  304 vm_map_init(map, min, max, pageable)
  305         register struct vm_map *map;
  306         vm_offset_t min, max;
  307         boolean_t pageable;
  308 {
  309         map->header.next = map->header.prev = &map->header;
  310         map->nentries = 0;
  311         map->size = 0;
  312         map->ref_count = 1;
  313         map->is_main_map = TRUE;
  314         map->min_offset = min;
  315         map->max_offset = max;
  316         map->entries_pageable = pageable;
  317         map->first_free = &map->header;
  318         map->hint = &map->header;
  319         map->timestamp = 0;
  320         lock_init(&map->lock, TRUE);
  321 }
  322 
  323 /*
  324  *      vm_map_entry_dispose:   [ internal use only ]
  325  *
  326  *      Inverse of vm_map_entry_create.
  327  */
  328 static void
  329 vm_map_entry_dispose(map, entry)
  330         vm_map_t map;
  331         vm_map_entry_t entry;
  332 {
  333         int s;
  334 
  335         if (map == kernel_map || map == kmem_map ||
  336                 map == mb_map || map == pager_map) {
  337                 s = splvm();
  338                 entry->next = kentry_free;
  339                 kentry_free = entry;
  340                 ++kentry_count;
  341                 splx(s);
  342         } else {
  343                 entry->next = mappool;
  344                 mappool = entry;
  345                 ++mappoolcnt;
  346         }
  347 }
  348 
  349 /*
  350  *      vm_map_entry_create:    [ internal use only ]
  351  *
  352  *      Allocates a VM map entry for insertion.
  353  *      No entry fields are filled in.  This routine is
  354  */
  355 static vm_map_entry_t
  356 vm_map_entry_create(map)
  357         vm_map_t map;
  358 {
  359         vm_map_entry_t entry;
  360         int i;
  361         int s;
  362 
  363         /*
  364          * This is a *very* nasty (and sort of incomplete) hack!!!!
  365          */
  366         if (kentry_count < KENTRY_LOW_WATER) {
  367                 s = splvm();
  368                 if (mapvmpgcnt && mapvm) {
  369                         vm_page_t m;
  370 
  371                         m = vm_page_alloc(kernel_object,
  372                                 OFF_TO_IDX(mapvm - VM_MIN_KERNEL_ADDRESS),
  373                                     (map == kmem_map || map == mb_map) ? VM_ALLOC_INTERRUPT : VM_ALLOC_NORMAL);
  374 
  375                         if (m) {
  376                                 int newentries;
  377 
  378                                 newentries = (PAGE_SIZE / sizeof(struct vm_map_entry));
  379                                 vm_page_wire(m);
  380                                 PAGE_WAKEUP(m);
  381                                 m->valid = VM_PAGE_BITS_ALL;
  382                                 pmap_kenter(mapvm, VM_PAGE_TO_PHYS(m));
  383                                 m->flags |= PG_WRITEABLE;
  384 
  385                                 entry = (vm_map_entry_t) mapvm;
  386                                 mapvm += PAGE_SIZE;
  387                                 --mapvmpgcnt;
  388 
  389                                 for (i = 0; i < newentries; i++) {
  390                                         vm_map_entry_dispose(kernel_map, entry);
  391                                         entry++;
  392                                 }
  393                         }
  394                 }
  395                 splx(s);
  396         }
  397 
  398         if (map == kernel_map || map == kmem_map ||
  399                 map == mb_map || map == pager_map) {
  400                 s = splvm();
  401                 entry = kentry_free;
  402                 if (entry) {
  403                         kentry_free = entry->next;
  404                         --kentry_count;
  405                 } else {
  406                         panic("vm_map_entry_create: out of map entries for kernel");
  407                 }
  408                 splx(s);
  409         } else {
  410                 entry = mappool;
  411                 if (entry) {
  412                         mappool = entry->next;
  413                         --mappoolcnt;
  414                 } else {
  415                         MALLOC(entry, vm_map_entry_t, sizeof(struct vm_map_entry),
  416                             M_VMMAPENT, M_WAITOK);
  417                 }
  418         }
  419 
  420         return (entry);
  421 }
  422 
  423 /*
  424  *      vm_map_entry_{un,}link:
  425  *
  426  *      Insert/remove entries from maps.
  427  */
  428 #define vm_map_entry_link(map, after_where, entry) \
  429                 { \
  430                 (map)->nentries++; \
  431                 (entry)->prev = (after_where); \
  432                 (entry)->next = (after_where)->next; \
  433                 (entry)->prev->next = (entry); \
  434                 (entry)->next->prev = (entry); \
  435                 }
  436 #define vm_map_entry_unlink(map, entry) \
  437                 { \
  438                 (map)->nentries--; \
  439                 (entry)->next->prev = (entry)->prev; \
  440                 (entry)->prev->next = (entry)->next; \
  441                 }
  442 
  443 /*
  444  *      vm_map_reference:
  445  *
  446  *      Creates another valid reference to the given map.
  447  *
  448  */
  449 void
  450 vm_map_reference(map)
  451         register vm_map_t map;
  452 {
  453         if (map == NULL)
  454                 return;
  455 
  456         map->ref_count++;
  457 }
  458 
  459 /*
  460  *      vm_map_deallocate:
  461  *
  462  *      Removes a reference from the specified map,
  463  *      destroying it if no references remain.
  464  *      The map should not be locked.
  465  */
  466 void
  467 vm_map_deallocate(map)
  468         register vm_map_t map;
  469 {
  470         register int c;
  471 
  472         if (map == NULL)
  473                 return;
  474 
  475         c = map->ref_count;
  476 
  477         if (c == 0)
  478                 panic("vm_map_deallocate: deallocating already freed map");
  479 
  480         if (c != 1) {
  481                 --map->ref_count;
  482                 wakeup(&map->ref_count);
  483                 return;
  484         }
  485         /*
  486          * Lock the map, to wait out all other references to it.
  487          */
  488 
  489         vm_map_lock(map);
  490         (void) vm_map_delete(map, map->min_offset, map->max_offset);
  491         --map->ref_count;
  492         if( map->ref_count != 0) {
  493                 vm_map_unlock(map);
  494                 return;
  495         }
  496 
  497         pmap_destroy(map->pmap);
  498         FREE(map, M_VMMAP);
  499 }
  500 
  501 /*
  502  *      SAVE_HINT:
  503  *
  504  *      Saves the specified entry as the hint for
  505  *      future lookups.
  506  */
  507 #define SAVE_HINT(map,value) \
  508                 (map)->hint = (value);
  509 
  510 /*
  511  *      vm_map_lookup_entry:    [ internal use only ]
  512  *
  513  *      Finds the map entry containing (or
  514  *      immediately preceding) the specified address
  515  *      in the given map; the entry is returned
  516  *      in the "entry" parameter.  The boolean
  517  *      result indicates whether the address is
  518  *      actually contained in the map.
  519  */
  520 boolean_t
  521 vm_map_lookup_entry(map, address, entry)
  522         register vm_map_t map;
  523         register vm_offset_t address;
  524         vm_map_entry_t *entry;  /* OUT */
  525 {
  526         register vm_map_entry_t cur;
  527         register vm_map_entry_t last;
  528 
  529         /*
  530          * Start looking either from the head of the list, or from the hint.
  531          */
  532 
  533         cur = map->hint;
  534 
  535         if (cur == &map->header)
  536                 cur = cur->next;
  537 
  538         if (address >= cur->start) {
  539                 /*
  540                  * Go from hint to end of list.
  541                  *
  542                  * But first, make a quick check to see if we are already looking
  543                  * at the entry we want (which is usually the case). Note also
  544                  * that we don't need to save the hint here... it is the same
  545                  * hint (unless we are at the header, in which case the hint
  546                  * didn't buy us anything anyway).
  547                  */
  548                 last = &map->header;
  549                 if ((cur != last) && (cur->end > address)) {
  550                         *entry = cur;
  551                         return (TRUE);
  552                 }
  553         } else {
  554                 /*
  555                  * Go from start to hint, *inclusively*
  556                  */
  557                 last = cur->next;
  558                 cur = map->header.next;
  559         }
  560 
  561         /*
  562          * Search linearly
  563          */
  564 
  565         while (cur != last) {
  566                 if (cur->end > address) {
  567                         if (address >= cur->start) {
  568                                 /*
  569                                  * Save this lookup for future hints, and
  570                                  * return
  571                                  */
  572 
  573                                 *entry = cur;
  574                                 SAVE_HINT(map, cur);
  575                                 return (TRUE);
  576                         }
  577                         break;
  578                 }
  579                 cur = cur->next;
  580         }
  581         *entry = cur->prev;
  582         SAVE_HINT(map, *entry);
  583         return (FALSE);
  584 }
  585 
  586 /*
  587  *      vm_map_insert:
  588  *
  589  *      Inserts the given whole VM object into the target
  590  *      map at the specified address range.  The object's
  591  *      size should match that of the address range.
  592  *
  593  *      Requires that the map be locked, and leaves it so.
  594  */
  595 int
  596 vm_map_insert(map, object, offset, start, end, prot, max, cow)
  597         vm_map_t map;
  598         vm_object_t object;
  599         vm_ooffset_t offset;
  600         vm_offset_t start;
  601         vm_offset_t end;
  602         vm_prot_t prot, max;
  603         int cow;
  604 {
  605         register vm_map_entry_t new_entry;
  606         register vm_map_entry_t prev_entry;
  607         vm_map_entry_t temp_entry;
  608         vm_object_t prev_object;
  609         u_char protoeflags;
  610 
  611         if ((object != NULL) && (cow & MAP_NOFAULT)) {
  612                 panic("vm_map_insert: paradoxical MAP_NOFAULT request");
  613         }
  614 
  615         /*
  616          * Check that the start and end points are not bogus.
  617          */
  618 
  619         if ((start < map->min_offset) || (end > map->max_offset) ||
  620             (start >= end))
  621                 return (KERN_INVALID_ADDRESS);
  622 
  623         /*
  624          * Find the entry prior to the proposed starting address; if it's part
  625          * of an existing entry, this range is bogus.
  626          */
  627 
  628         if (vm_map_lookup_entry(map, start, &temp_entry))
  629                 return (KERN_NO_SPACE);
  630 
  631         prev_entry = temp_entry;
  632 
  633         /*
  634          * Assert that the next entry doesn't overlap the end point.
  635          */
  636 
  637         if ((prev_entry->next != &map->header) &&
  638             (prev_entry->next->start < end))
  639                 return (KERN_NO_SPACE);
  640 
  641         protoeflags = 0;
  642         if (cow & MAP_COPY_NEEDED)
  643                 protoeflags |= MAP_ENTRY_NEEDS_COPY;
  644 
  645         if (cow & MAP_COPY_ON_WRITE)
  646                 protoeflags |= MAP_ENTRY_COW;
  647 
  648         if (cow & MAP_NOFAULT)
  649                 protoeflags |= MAP_ENTRY_NOFAULT;
  650 
  651         /*
  652          * See if we can avoid creating a new entry by extending one of our
  653          * neighbors.  Or at least extend the object.
  654          */
  655 
  656         if ((object == NULL) &&
  657             (prev_entry != &map->header) &&
  658             (( prev_entry->eflags & (MAP_ENTRY_IS_A_MAP | MAP_ENTRY_IS_SUB_MAP)) == 0) &&
  659             (prev_entry->end == start) &&
  660             (prev_entry->wired_count == 0)) {
  661                 
  662 
  663                 if ((protoeflags == prev_entry->eflags) &&
  664                     ((cow & MAP_NOFAULT) ||
  665                      vm_object_coalesce(prev_entry->object.vm_object,
  666                                         OFF_TO_IDX(prev_entry->offset),
  667                                         (vm_size_t) (prev_entry->end - prev_entry->start),
  668                                         (vm_size_t) (end - prev_entry->end)))) {
  669 
  670                         /*
  671                          * Coalesced the two objects.  Can we extend the
  672                          * previous map entry to include the new range?
  673                          */
  674                         if ((prev_entry->inheritance == VM_INHERIT_DEFAULT) &&
  675                             (prev_entry->protection == prot) &&
  676                             (prev_entry->max_protection == max)) {
  677 
  678                                 map->size += (end - prev_entry->end);
  679                                 prev_entry->end = end;
  680                                 if ((cow & MAP_NOFAULT) == 0) {
  681                                         prev_object = prev_entry->object.vm_object;
  682                                         default_pager_convert_to_swapq(prev_object);
  683                                 }
  684                                 return (KERN_SUCCESS);
  685                         }
  686                         else {
  687                                 object = prev_entry->object.vm_object;
  688                                 offset = prev_entry->offset + (prev_entry->end -
  689                                                                prev_entry->start);
  690 
  691                                 vm_object_reference(object);
  692                         }
  693                 }
  694         }
  695 
  696         /*
  697          * Create a new entry
  698          */
  699 
  700         new_entry = vm_map_entry_create(map);
  701         new_entry->start = start;
  702         new_entry->end = end;
  703 
  704         new_entry->eflags = protoeflags;
  705         new_entry->object.vm_object = object;
  706         new_entry->offset = offset;
  707 
  708         if (map->is_main_map) {
  709                 new_entry->inheritance = VM_INHERIT_DEFAULT;
  710                 new_entry->protection = prot;
  711                 new_entry->max_protection = max;
  712                 new_entry->wired_count = 0;
  713         }
  714         /*
  715          * Insert the new entry into the list
  716          */
  717 
  718         vm_map_entry_link(map, prev_entry, new_entry);
  719         map->size += new_entry->end - new_entry->start;
  720 
  721         /*
  722          * Update the free space hint
  723          */
  724         if ((map->first_free == prev_entry) &&
  725                 (prev_entry->end >= new_entry->start))
  726                 map->first_free = new_entry;
  727 
  728         default_pager_convert_to_swapq(object);
  729         return (KERN_SUCCESS);
  730 }
  731 
  732 /*
  733  * Find sufficient space for `length' bytes in the given map, starting at
  734  * `start'.  The map must be locked.  Returns 0 on success, 1 on no space.
  735  */
  736 int
  737 vm_map_findspace(map, start, length, addr)
  738         register vm_map_t map;
  739         register vm_offset_t start;
  740         vm_size_t length;
  741         vm_offset_t *addr;
  742 {
  743         register vm_map_entry_t entry, next;
  744         register vm_offset_t end;
  745 
  746         if (start < map->min_offset)
  747                 start = map->min_offset;
  748         if (start > map->max_offset)
  749                 return (1);
  750 
  751         /*
  752          * Look for the first possible address; if there's already something
  753          * at this address, we have to start after it.
  754          */
  755         if (start == map->min_offset) {
  756                 if ((entry = map->first_free) != &map->header)
  757                         start = entry->end;
  758         } else {
  759                 vm_map_entry_t tmp;
  760 
  761                 if (vm_map_lookup_entry(map, start, &tmp))
  762                         start = tmp->end;
  763                 entry = tmp;
  764         }
  765 
  766         /*
  767          * Look through the rest of the map, trying to fit a new region in the
  768          * gap between existing regions, or after the very last region.
  769          */
  770         for (;; start = (entry = next)->end) {
  771                 /*
  772                  * Find the end of the proposed new region.  Be sure we didn't
  773                  * go beyond the end of the map, or wrap around the address;
  774                  * if so, we lose.  Otherwise, if this is the last entry, or
  775                  * if the proposed new region fits before the next entry, we
  776                  * win.
  777                  */
  778                 end = start + length;
  779                 if (end > map->max_offset || end < start)
  780                         return (1);
  781                 next = entry->next;
  782                 if (next == &map->header || next->start >= end)
  783                         break;
  784         }
  785         SAVE_HINT(map, entry);
  786         *addr = start;
  787         if (map == kernel_map && round_page(start + length) > kernel_vm_end)
  788                 pmap_growkernel(round_page(start + length));
  789         return (0);
  790 }
  791 
  792 /*
  793  *      vm_map_find finds an unallocated region in the target address
  794  *      map with the given length.  The search is defined to be
  795  *      first-fit from the specified address; the region found is
  796  *      returned in the same parameter.
  797  *
  798  */
  799 int
  800 vm_map_find(map, object, offset, addr, length, find_space, prot, max, cow)
  801         vm_map_t map;
  802         vm_object_t object;
  803         vm_ooffset_t offset;
  804         vm_offset_t *addr;      /* IN/OUT */
  805         vm_size_t length;
  806         boolean_t find_space;
  807         vm_prot_t prot, max;
  808         int cow;
  809 {
  810         register vm_offset_t start;
  811         int result, s = 0;
  812 
  813         start = *addr;
  814 
  815         if (map == kmem_map || map == mb_map)
  816                 s = splvm();
  817 
  818         vm_map_lock(map);
  819         if (find_space) {
  820                 if (vm_map_findspace(map, start, length, addr)) {
  821                         vm_map_unlock(map);
  822                         if (map == kmem_map || map == mb_map)
  823                                 splx(s);
  824                         return (KERN_NO_SPACE);
  825                 }
  826                 start = *addr;
  827         }
  828         result = vm_map_insert(map, object, offset,
  829                 start, start + length, prot, max, cow);
  830         vm_map_unlock(map);
  831 
  832         if (map == kmem_map || map == mb_map)
  833                 splx(s);
  834 
  835         return (result);
  836 }
  837 
  838 /*
  839  *      vm_map_simplify_entry:
  840  *
  841  *      Simplify the given map entry by merging with either neighbor.
  842  */
  843 void
  844 vm_map_simplify_entry(map, entry)
  845         vm_map_t map;
  846         vm_map_entry_t entry;
  847 {
  848         vm_map_entry_t next, prev;
  849         vm_size_t prevsize, esize;
  850 
  851         if (entry->eflags & (MAP_ENTRY_IS_SUB_MAP|MAP_ENTRY_IS_A_MAP))
  852                 return;
  853 
  854         prev = entry->prev;
  855         if (prev != &map->header) {
  856                 prevsize = prev->end - prev->start;
  857                 if ( (prev->end == entry->start) &&
  858                      (prev->object.vm_object == entry->object.vm_object) &&
  859                      (!prev->object.vm_object || (prev->object.vm_object->behavior == entry->object.vm_object->behavior)) &&
  860                      (!prev->object.vm_object ||
  861                         (prev->offset + prevsize == entry->offset)) &&
  862                      (prev->eflags == entry->eflags) &&
  863                      (prev->protection == entry->protection) &&
  864                      (prev->max_protection == entry->max_protection) &&
  865                      (prev->inheritance == entry->inheritance) &&
  866                      (prev->wired_count == entry->wired_count)) {
  867                         if (map->first_free == prev)
  868                                 map->first_free = entry;
  869                         if (map->hint == prev)
  870                                 map->hint = entry;
  871                         vm_map_entry_unlink(map, prev);
  872                         entry->start = prev->start;
  873                         entry->offset = prev->offset;
  874                         if (prev->object.vm_object)
  875                                 vm_object_deallocate(prev->object.vm_object);
  876                         vm_map_entry_dispose(map, prev);
  877                 }
  878         }
  879 
  880         next = entry->next;
  881         if (next != &map->header) {
  882                 esize = entry->end - entry->start;
  883                 if ((entry->end == next->start) &&
  884                     (next->object.vm_object == entry->object.vm_object) &&
  885                     (!next->object.vm_object || (next->object.vm_object->behavior == entry->object.vm_object->behavior)) &&
  886                      (!entry->object.vm_object ||
  887                         (entry->offset + esize == next->offset)) &&
  888                     (next->eflags == entry->eflags) &&
  889                     (next->protection == entry->protection) &&
  890                     (next->max_protection == entry->max_protection) &&
  891                     (next->inheritance == entry->inheritance) &&
  892                     (next->wired_count == entry->wired_count)) {
  893                         if (map->first_free == next)
  894                                 map->first_free = entry;
  895                         if (map->hint == next)
  896                                 map->hint = entry;
  897                         vm_map_entry_unlink(map, next);
  898                         entry->end = next->end;
  899                         if (next->object.vm_object)
  900                                 vm_object_deallocate(next->object.vm_object);
  901                         vm_map_entry_dispose(map, next);
  902                 }
  903         }
  904 }
  905 /*
  906  *      vm_map_clip_start:      [ internal use only ]
  907  *
  908  *      Asserts that the given entry begins at or after
  909  *      the specified address; if necessary,
  910  *      it splits the entry into two.
  911  */
  912 #define vm_map_clip_start(map, entry, startaddr) \
  913 { \
  914         if (startaddr > entry->start) \
  915                 _vm_map_clip_start(map, entry, startaddr); \
  916 }
  917 
  918 /*
  919  *      This routine is called only when it is known that
  920  *      the entry must be split.
  921  */
  922 static void
  923 _vm_map_clip_start(map, entry, start)
  924         register vm_map_t map;
  925         register vm_map_entry_t entry;
  926         register vm_offset_t start;
  927 {
  928         register vm_map_entry_t new_entry;
  929 
  930         /*
  931          * Split off the front portion -- note that we must insert the new
  932          * entry BEFORE this one, so that this entry has the specified
  933          * starting address.
  934          */
  935 
  936         vm_map_simplify_entry(map, entry);
  937 
  938         /*
  939          * If there is no object backing this entry, we might as well create
  940          * one now.  If we defer it, an object can get created after the map
  941          * is clipped, and individual objects will be created for the split-up
  942          * map.  This is a bit of a hack, but is also about the best place to
  943          * put this improvement.
  944          */
  945 
  946         if (entry->object.vm_object == NULL) {
  947                         vm_object_t object;
  948 
  949                         object = vm_object_allocate(OBJT_DEFAULT,
  950                                         OFF_TO_IDX(entry->end - entry->start));
  951                         entry->object.vm_object = object;
  952                         entry->offset = 0;
  953         }
  954 
  955         new_entry = vm_map_entry_create(map);
  956         *new_entry = *entry;
  957 
  958         new_entry->end = start;
  959         entry->offset += (start - entry->start);
  960         entry->start = start;
  961 
  962         vm_map_entry_link(map, entry->prev, new_entry);
  963 
  964         if (entry->eflags & (MAP_ENTRY_IS_A_MAP|MAP_ENTRY_IS_SUB_MAP))
  965                 vm_map_reference(new_entry->object.share_map);
  966         else
  967                 vm_object_reference(new_entry->object.vm_object);
  968 }
  969 
  970 /*
  971  *      vm_map_clip_end:        [ internal use only ]
  972  *
  973  *      Asserts that the given entry ends at or before
  974  *      the specified address; if necessary,
  975  *      it splits the entry into two.
  976  */
  977 
  978 #define vm_map_clip_end(map, entry, endaddr) \
  979 { \
  980         if (endaddr < entry->end) \
  981                 _vm_map_clip_end(map, entry, endaddr); \
  982 }
  983 
  984 /*
  985  *      This routine is called only when it is known that
  986  *      the entry must be split.
  987  */
  988 static void
  989 _vm_map_clip_end(map, entry, end)
  990         register vm_map_t map;
  991         register vm_map_entry_t entry;
  992         register vm_offset_t end;
  993 {
  994         register vm_map_entry_t new_entry;
  995 
  996         /*
  997          * If there is no object backing this entry, we might as well create
  998          * one now.  If we defer it, an object can get created after the map
  999          * is clipped, and individual objects will be created for the split-up
 1000          * map.  This is a bit of a hack, but is also about the best place to
 1001          * put this improvement.
 1002          */
 1003 
 1004         if (entry->object.vm_object == NULL) {
 1005                         vm_object_t object;
 1006 
 1007                         object = vm_object_allocate(OBJT_DEFAULT,
 1008                                         OFF_TO_IDX(entry->end - entry->start));
 1009                         entry->object.vm_object = object;
 1010                         entry->offset = 0;
 1011         }
 1012 
 1013         /*
 1014          * Create a new entry and insert it AFTER the specified entry
 1015          */
 1016 
 1017         new_entry = vm_map_entry_create(map);
 1018         *new_entry = *entry;
 1019 
 1020         new_entry->start = entry->end = end;
 1021         new_entry->offset += (end - entry->start);
 1022 
 1023         vm_map_entry_link(map, entry, new_entry);
 1024 
 1025         if (entry->eflags & (MAP_ENTRY_IS_A_MAP|MAP_ENTRY_IS_SUB_MAP))
 1026                 vm_map_reference(new_entry->object.share_map);
 1027         else
 1028                 vm_object_reference(new_entry->object.vm_object);
 1029 }
 1030 
 1031 /*
 1032  *      VM_MAP_RANGE_CHECK:     [ internal use only ]
 1033  *
 1034  *      Asserts that the starting and ending region
 1035  *      addresses fall within the valid range of the map.
 1036  */
 1037 #define VM_MAP_RANGE_CHECK(map, start, end)             \
 1038                 {                                       \
 1039                 if (start < vm_map_min(map))            \
 1040                         start = vm_map_min(map);        \
 1041                 if (end > vm_map_max(map))              \
 1042                         end = vm_map_max(map);          \
 1043                 if (start > end)                        \
 1044                         start = end;                    \
 1045                 }
 1046 
 1047 /*
 1048  *      vm_map_submap:          [ kernel use only ]
 1049  *
 1050  *      Mark the given range as handled by a subordinate map.
 1051  *
 1052  *      This range must have been created with vm_map_find,
 1053  *      and no other operations may have been performed on this
 1054  *      range prior to calling vm_map_submap.
 1055  *
 1056  *      Only a limited number of operations can be performed
 1057  *      within this rage after calling vm_map_submap:
 1058  *              vm_fault
 1059  *      [Don't try vm_map_copy!]
 1060  *
 1061  *      To remove a submapping, one must first remove the
 1062  *      range from the superior map, and then destroy the
 1063  *      submap (if desired).  [Better yet, don't try it.]
 1064  */
 1065 int
 1066 vm_map_submap(map, start, end, submap)
 1067         register vm_map_t map;
 1068         register vm_offset_t start;
 1069         register vm_offset_t end;
 1070         vm_map_t submap;
 1071 {
 1072         vm_map_entry_t entry;
 1073         register int result = KERN_INVALID_ARGUMENT;
 1074 
 1075         vm_map_lock(map);
 1076 
 1077         VM_MAP_RANGE_CHECK(map, start, end);
 1078 
 1079         if (vm_map_lookup_entry(map, start, &entry)) {
 1080                 vm_map_clip_start(map, entry, start);
 1081         } else
 1082                 entry = entry->next;
 1083 
 1084         vm_map_clip_end(map, entry, end);
 1085 
 1086         if ((entry->start == start) && (entry->end == end) &&
 1087             ((entry->eflags & (MAP_ENTRY_IS_A_MAP|MAP_ENTRY_COW)) == 0) &&
 1088             (entry->object.vm_object == NULL)) {
 1089                 entry->eflags |= MAP_ENTRY_IS_SUB_MAP;
 1090                 vm_map_reference(entry->object.sub_map = submap);
 1091                 result = KERN_SUCCESS;
 1092         }
 1093         vm_map_unlock(map);
 1094 
 1095         return (result);
 1096 }
 1097 
 1098 /*
 1099  *      vm_map_protect:
 1100  *
 1101  *      Sets the protection of the specified address
 1102  *      region in the target map.  If "set_max" is
 1103  *      specified, the maximum protection is to be set;
 1104  *      otherwise, only the current protection is affected.
 1105  */
 1106 int
 1107 vm_map_protect(map, start, end, new_prot, set_max)
 1108         register vm_map_t map;
 1109         register vm_offset_t start;
 1110         register vm_offset_t end;
 1111         register vm_prot_t new_prot;
 1112         register boolean_t set_max;
 1113 {
 1114         register vm_map_entry_t current;
 1115         vm_map_entry_t entry;
 1116 
 1117         vm_map_lock(map);
 1118 
 1119         VM_MAP_RANGE_CHECK(map, start, end);
 1120 
 1121         if (vm_map_lookup_entry(map, start, &entry)) {
 1122                 vm_map_clip_start(map, entry, start);
 1123         } else {
 1124                 entry = entry->next;
 1125         }
 1126 
 1127         /*
 1128          * Make a first pass to check for protection violations.
 1129          */
 1130 
 1131         current = entry;
 1132         while ((current != &map->header) && (current->start < end)) {
 1133                 if (current->eflags & MAP_ENTRY_IS_SUB_MAP) {
 1134                         vm_map_unlock(map);
 1135                         return (KERN_INVALID_ARGUMENT);
 1136                 }
 1137                 if ((new_prot & current->max_protection) != new_prot) {
 1138                         vm_map_unlock(map);
 1139                         return (KERN_PROTECTION_FAILURE);
 1140                 }
 1141                 current = current->next;
 1142         }
 1143 
 1144         /*
 1145          * Go back and fix up protections. [Note that clipping is not
 1146          * necessary the second time.]
 1147          */
 1148 
 1149         current = entry;
 1150 
 1151         while ((current != &map->header) && (current->start < end)) {
 1152                 vm_prot_t old_prot;
 1153 
 1154                 vm_map_clip_end(map, current, end);
 1155 
 1156                 old_prot = current->protection;
 1157                 if (set_max)
 1158                         current->protection =
 1159                             (current->max_protection = new_prot) &
 1160                             old_prot;
 1161                 else
 1162                         current->protection = new_prot;
 1163 
 1164                 /*
 1165                  * Update physical map if necessary. Worry about copy-on-write
 1166                  * here -- CHECK THIS XXX
 1167                  */
 1168 
 1169                 if (current->protection != old_prot) {
 1170 #define MASK(entry)     (((entry)->eflags & MAP_ENTRY_COW) ? ~VM_PROT_WRITE : \
 1171                                                         VM_PROT_ALL)
 1172 #define max(a,b)        ((a) > (b) ? (a) : (b))
 1173 
 1174                         if (current->eflags & MAP_ENTRY_IS_A_MAP) {
 1175                                 vm_map_entry_t share_entry;
 1176                                 vm_offset_t share_end;
 1177 
 1178                                 vm_map_lock(current->object.share_map);
 1179                                 (void) vm_map_lookup_entry(
 1180                                     current->object.share_map,
 1181                                     current->offset,
 1182                                     &share_entry);
 1183                                 share_end = current->offset +
 1184                                     (current->end - current->start);
 1185                                 while ((share_entry !=
 1186                                         &current->object.share_map->header) &&
 1187                                     (share_entry->start < share_end)) {
 1188 
 1189                                         pmap_protect(map->pmap,
 1190                                             (max(share_entry->start,
 1191                                                     current->offset) -
 1192                                                 current->offset +
 1193                                                 current->start),
 1194                                             min(share_entry->end,
 1195                                                 share_end) -
 1196                                             current->offset +
 1197                                             current->start,
 1198                                             current->protection &
 1199                                             MASK(share_entry));
 1200 
 1201                                         share_entry = share_entry->next;
 1202                                 }
 1203                                 vm_map_unlock(current->object.share_map);
 1204                         } else
 1205                                 pmap_protect(map->pmap, current->start,
 1206                                     current->end,
 1207                                     current->protection & MASK(entry));
 1208 #undef  max
 1209 #undef  MASK
 1210                 }
 1211 
 1212                 vm_map_simplify_entry(map, current);
 1213 
 1214                 current = current->next;
 1215         }
 1216 
 1217         vm_map_unlock(map);
 1218         return (KERN_SUCCESS);
 1219 }
 1220 
 1221 /*
 1222  *      vm_map_madvise:
 1223  *
 1224  *      This routine traverses a processes map handling the madvise
 1225  *      system call.
 1226  */
 1227 void
 1228 vm_map_madvise(map, pmap, start, end, advise)
 1229         vm_map_t map;
 1230         pmap_t pmap;
 1231         vm_offset_t start, end;
 1232         int advise;
 1233 {
 1234         register vm_map_entry_t current;
 1235         vm_map_entry_t entry;
 1236 
 1237         vm_map_lock(map);
 1238 
 1239         VM_MAP_RANGE_CHECK(map, start, end);
 1240 
 1241         if (vm_map_lookup_entry(map, start, &entry)) {
 1242                 vm_map_clip_start(map, entry, start);
 1243         } else
 1244                 entry = entry->next;
 1245 
 1246         for(current = entry;
 1247                 (current != &map->header) && (current->start < end);
 1248                 current = current->next) {
 1249                 vm_size_t size = current->end - current->start;
 1250 
 1251                 if (current->eflags & (MAP_ENTRY_IS_A_MAP|MAP_ENTRY_IS_SUB_MAP)) {
 1252                         continue;
 1253                 }
 1254 
 1255                 /*
 1256                  * Create an object if needed
 1257                  */
 1258                 if (current->object.vm_object == NULL) {
 1259                         vm_object_t object;
 1260                         object = vm_object_allocate(OBJT_DEFAULT, OFF_TO_IDX(size));
 1261                         current->object.vm_object = object;
 1262                         current->offset = 0;
 1263                 }
 1264 
 1265                 vm_map_clip_end(map, current, end);
 1266                 switch (advise) {
 1267         case MADV_NORMAL:
 1268                         current->object.vm_object->behavior = OBJ_NORMAL;
 1269                         break;
 1270         case MADV_SEQUENTIAL:
 1271                         current->object.vm_object->behavior = OBJ_SEQUENTIAL;
 1272                         break;
 1273         case MADV_RANDOM:
 1274                         current->object.vm_object->behavior = OBJ_RANDOM;
 1275                         break;
 1276         /*
 1277          * Right now, we could handle DONTNEED and WILLNEED with common code.
 1278          * They are mostly the same, except for the potential async reads (NYI).
 1279          */
 1280         case MADV_FREE:
 1281         case MADV_DONTNEED:
 1282                         {
 1283                                 vm_pindex_t pindex;
 1284                                 int count;
 1285                                 size = current->end - current->start;
 1286                                 pindex = OFF_TO_IDX(entry->offset);
 1287                                 count = OFF_TO_IDX(size);
 1288                                 /*
 1289                                  * MADV_DONTNEED removes the page from all
 1290                                  * pmaps, so pmap_remove is not necessary.
 1291                                  */
 1292                                 vm_object_madvise(current->object.vm_object,
 1293                                         pindex, count, advise);
 1294                         }
 1295                         break;
 1296 
 1297         case MADV_WILLNEED:
 1298                         {
 1299                                 vm_pindex_t pindex;
 1300                                 int count;
 1301                                 size = current->end - current->start;
 1302                                 pindex = OFF_TO_IDX(current->offset);
 1303                                 count = OFF_TO_IDX(size);
 1304                                 vm_object_madvise(current->object.vm_object,
 1305                                         pindex, count, advise);
 1306                                 pmap_object_init_pt(pmap, current->start,
 1307                                         current->object.vm_object, pindex,
 1308                                         (count << PAGE_SHIFT), 0);
 1309                         }
 1310                         break;
 1311 
 1312         default:
 1313                         break;
 1314                 }
 1315         }
 1316 
 1317         vm_map_simplify_entry(map, entry);
 1318         vm_map_unlock(map);
 1319         return;
 1320 }       
 1321 
 1322 
 1323 /*
 1324  *      vm_map_inherit:
 1325  *
 1326  *      Sets the inheritance of the specified address
 1327  *      range in the target map.  Inheritance
 1328  *      affects how the map will be shared with
 1329  *      child maps at the time of vm_map_fork.
 1330  */
 1331 int
 1332 vm_map_inherit(map, start, end, new_inheritance)
 1333         register vm_map_t map;
 1334         register vm_offset_t start;
 1335         register vm_offset_t end;
 1336         register vm_inherit_t new_inheritance;
 1337 {
 1338         register vm_map_entry_t entry;
 1339         vm_map_entry_t temp_entry;
 1340 
 1341         switch (new_inheritance) {
 1342         case VM_INHERIT_NONE:
 1343         case VM_INHERIT_COPY:
 1344         case VM_INHERIT_SHARE:
 1345                 break;
 1346         default:
 1347                 return (KERN_INVALID_ARGUMENT);
 1348         }
 1349 
 1350         vm_map_lock(map);
 1351 
 1352         VM_MAP_RANGE_CHECK(map, start, end);
 1353 
 1354         if (vm_map_lookup_entry(map, start, &temp_entry)) {
 1355                 entry = temp_entry;
 1356                 vm_map_clip_start(map, entry, start);
 1357         } else
 1358                 entry = temp_entry->next;
 1359 
 1360         while ((entry != &map->header) && (entry->start < end)) {
 1361                 vm_map_clip_end(map, entry, end);
 1362 
 1363                 entry->inheritance = new_inheritance;
 1364 
 1365                 entry = entry->next;
 1366         }
 1367 
 1368         vm_map_simplify_entry(map, temp_entry);
 1369         vm_map_unlock(map);
 1370         return (KERN_SUCCESS);
 1371 }
 1372 
 1373 /*
 1374  * Implement the semantics of mlock
 1375  */
 1376 int
 1377 vm_map_user_pageable(map, start, end, new_pageable)
 1378         register vm_map_t map;
 1379         register vm_offset_t start;
 1380         register vm_offset_t end;
 1381         register boolean_t new_pageable;
 1382 {
 1383         register vm_map_entry_t entry;
 1384         vm_map_entry_t start_entry;
 1385         register vm_offset_t failed = 0;
 1386         int rv;
 1387 
 1388         vm_map_lock(map);
 1389         VM_MAP_RANGE_CHECK(map, start, end);
 1390 
 1391         if (vm_map_lookup_entry(map, start, &start_entry) == FALSE) {
 1392                 vm_map_unlock(map);
 1393                 return (KERN_INVALID_ADDRESS);
 1394         }
 1395 
 1396         if (new_pageable) {
 1397 
 1398                 entry = start_entry;
 1399                 vm_map_clip_start(map, entry, start);
 1400 
 1401                 /*
 1402                  * Now decrement the wiring count for each region. If a region
 1403                  * becomes completely unwired, unwire its physical pages and
 1404                  * mappings.
 1405                  */
 1406                 lock_set_recursive(&map->lock);
 1407 
 1408                 entry = start_entry;
 1409                 while ((entry != &map->header) && (entry->start < end)) {
 1410                         if (entry->eflags & MAP_ENTRY_USER_WIRED) {
 1411                                 vm_map_clip_end(map, entry, end);
 1412                                 entry->eflags &= ~MAP_ENTRY_USER_WIRED;
 1413                                 entry->wired_count--;
 1414                                 if (entry->wired_count == 0)
 1415                                         vm_fault_unwire(map, entry->start, entry->end);
 1416                         }
 1417                         entry = entry->next;
 1418                 }
 1419                 vm_map_simplify_entry(map, start_entry);
 1420                 lock_clear_recursive(&map->lock);
 1421         } else {
 1422 
 1423                 /*
 1424                  * Because of the possiblity of blocking, etc.  We restart
 1425                  * through the process's map entries from beginning so that
 1426                  * we don't end up depending on a map entry that could have
 1427                  * changed.
 1428                  */
 1429         rescan:
 1430 
 1431                 entry = start_entry;
 1432 
 1433                 while ((entry != &map->header) && (entry->start < end)) {
 1434 
 1435                         if (entry->eflags & MAP_ENTRY_USER_WIRED) {
 1436                                 entry = entry->next;
 1437                                 continue;
 1438                         }
 1439                         
 1440                         if (entry->wired_count != 0) {
 1441                                 entry->wired_count++;
 1442                                 entry->eflags |= MAP_ENTRY_USER_WIRED;
 1443                                 entry = entry->next;
 1444                                 continue;
 1445                         }
 1446 
 1447                         /* Here on entry being newly wired */
 1448 
 1449                         if ((entry->eflags & (MAP_ENTRY_IS_A_MAP|MAP_ENTRY_IS_SUB_MAP)) == 0) {
 1450                                 int copyflag = entry->eflags & MAP_ENTRY_NEEDS_COPY;
 1451                                 if (copyflag && ((entry->protection & VM_PROT_WRITE) != 0)) {
 1452 
 1453                                         vm_object_shadow(&entry->object.vm_object,
 1454                                             &entry->offset,
 1455                                             OFF_TO_IDX(entry->end
 1456                                                 - entry->start));
 1457                                         entry->eflags &= ~MAP_ENTRY_NEEDS_COPY;
 1458 
 1459                                 } else if (entry->object.vm_object == NULL) {
 1460 
 1461                                         entry->object.vm_object =
 1462                                             vm_object_allocate(OBJT_DEFAULT,
 1463                                                 OFF_TO_IDX(entry->end - entry->start));
 1464                                         entry->offset = (vm_offset_t) 0;
 1465 
 1466                                 }
 1467                                 default_pager_convert_to_swapq(entry->object.vm_object);
 1468                         }
 1469 
 1470                         vm_map_clip_start(map, entry, start);
 1471                         vm_map_clip_end(map, entry, end);
 1472 
 1473                         entry->wired_count++;
 1474                         entry->eflags |= MAP_ENTRY_USER_WIRED;
 1475 
 1476                         /* First we need to allow map modifications */
 1477                         lock_set_recursive(&map->lock);
 1478                         lock_write_to_read(&map->lock);
 1479 
 1480                         rv = vm_fault_user_wire(map, entry->start, entry->end);
 1481                         if (rv) {
 1482 
 1483                                 entry->wired_count--;
 1484                                 entry->eflags &= ~MAP_ENTRY_USER_WIRED;
 1485 
 1486                                 lock_clear_recursive(&map->lock);
 1487                                 vm_map_unlock(map);
 1488                                 
 1489                                 (void) vm_map_user_pageable(map, start, entry->start, TRUE);
 1490                                 return rv;
 1491                         }
 1492 
 1493                         lock_clear_recursive(&map->lock);
 1494                         lock_read_to_write(&map->lock);
 1495 
 1496                         goto rescan;
 1497                 }
 1498         }
 1499         vm_map_unlock(map);
 1500         return KERN_SUCCESS;
 1501 }
 1502 
 1503 /*
 1504  *      vm_map_pageable:
 1505  *
 1506  *      Sets the pageability of the specified address
 1507  *      range in the target map.  Regions specified
 1508  *      as not pageable require locked-down physical
 1509  *      memory and physical page maps.
 1510  *
 1511  *      The map must not be locked, but a reference
 1512  *      must remain to the map throughout the call.
 1513  */
 1514 int
 1515 vm_map_pageable(map, start, end, new_pageable)
 1516         register vm_map_t map;
 1517         register vm_offset_t start;
 1518         register vm_offset_t end;
 1519         register boolean_t new_pageable;
 1520 {
 1521         register vm_map_entry_t entry;
 1522         vm_map_entry_t start_entry;
 1523         register vm_offset_t failed = 0;
 1524         int rv;
 1525 
 1526         vm_map_lock(map);
 1527 
 1528         VM_MAP_RANGE_CHECK(map, start, end);
 1529 
 1530         /*
 1531          * Only one pageability change may take place at one time, since
 1532          * vm_fault assumes it will be called only once for each
 1533          * wiring/unwiring.  Therefore, we have to make sure we're actually
 1534          * changing the pageability for the entire region.  We do so before
 1535          * making any changes.
 1536          */
 1537 
 1538         if (vm_map_lookup_entry(map, start, &start_entry) == FALSE) {
 1539                 vm_map_unlock(map);
 1540                 return (KERN_INVALID_ADDRESS);
 1541         }
 1542         entry = start_entry;
 1543 
 1544         /*
 1545          * Actions are rather different for wiring and unwiring, so we have
 1546          * two separate cases.
 1547          */
 1548 
 1549         if (new_pageable) {
 1550 
 1551                 vm_map_clip_start(map, entry, start);
 1552 
 1553                 /*
 1554                  * Unwiring.  First ensure that the range to be unwired is
 1555                  * really wired down and that there are no holes.
 1556                  */
 1557                 while ((entry != &map->header) && (entry->start < end)) {
 1558 
 1559                         if (entry->wired_count == 0 ||
 1560                             (entry->end < end &&
 1561                                 (entry->next == &map->header ||
 1562                                     entry->next->start > entry->end))) {
 1563                                 vm_map_unlock(map);
 1564                                 return (KERN_INVALID_ARGUMENT);
 1565                         }
 1566                         entry = entry->next;
 1567                 }
 1568 
 1569                 /*
 1570                  * Now decrement the wiring count for each region. If a region
 1571                  * becomes completely unwired, unwire its physical pages and
 1572                  * mappings.
 1573                  */
 1574                 lock_set_recursive(&map->lock);
 1575 
 1576                 entry = start_entry;
 1577                 while ((entry != &map->header) && (entry->start < end)) {
 1578                         vm_map_clip_end(map, entry, end);
 1579 
 1580                         entry->wired_count--;
 1581                         if (entry->wired_count == 0)
 1582                                 vm_fault_unwire(map, entry->start, entry->end);
 1583 
 1584                         entry = entry->next;
 1585                 }
 1586                 vm_map_simplify_entry(map, start_entry);
 1587                 lock_clear_recursive(&map->lock);
 1588         } else {
 1589                 /*
 1590                  * Wiring.  We must do this in two passes:
 1591                  *
 1592                  * 1.  Holding the write lock, we create any shadow or zero-fill
 1593                  * objects that need to be created. Then we clip each map
 1594                  * entry to the region to be wired and increment its wiring
 1595                  * count.  We create objects before clipping the map entries
 1596                  * to avoid object proliferation.
 1597                  *
 1598                  * 2.  We downgrade to a read lock, and call vm_fault_wire to
 1599                  * fault in the pages for any newly wired area (wired_count is
 1600                  * 1).
 1601                  *
 1602                  * Downgrading to a read lock for vm_fault_wire avoids a possible
 1603                  * deadlock with another process that may have faulted on one
 1604                  * of the pages to be wired (it would mark the page busy,
 1605                  * blocking us, then in turn block on the map lock that we
 1606                  * hold).  Because of problems in the recursive lock package,
 1607                  * we cannot upgrade to a write lock in vm_map_lookup.  Thus,
 1608                  * any actions that require the write lock must be done
 1609                  * beforehand.  Because we keep the read lock on the map, the
 1610                  * copy-on-write status of the entries we modify here cannot
 1611                  * change.
 1612                  */
 1613 
 1614                 /*
 1615                  * Pass 1.
 1616                  */
 1617                 while ((entry != &map->header) && (entry->start < end)) {
 1618                         if (entry->wired_count == 0) {
 1619 
 1620                                 /*
 1621                                  * Perform actions of vm_map_lookup that need
 1622                                  * the write lock on the map: create a shadow
 1623                                  * object for a copy-on-write region, or an
 1624                                  * object for a zero-fill region.
 1625                                  *
 1626                                  * We don't have to do this for entries that
 1627                                  * point to sharing maps, because we won't
 1628                                  * hold the lock on the sharing map.
 1629                                  */
 1630                                 if ((entry->eflags & (MAP_ENTRY_IS_A_MAP|MAP_ENTRY_IS_SUB_MAP)) == 0) {
 1631                                         int copyflag = entry->eflags & MAP_ENTRY_NEEDS_COPY;
 1632                                         if (copyflag &&
 1633                                             ((entry->protection & VM_PROT_WRITE) != 0)) {
 1634 
 1635                                                 vm_object_shadow(&entry->object.vm_object,
 1636                                                     &entry->offset,
 1637                                                     OFF_TO_IDX(entry->end
 1638                                                         - entry->start));
 1639                                                 entry->eflags &= ~MAP_ENTRY_NEEDS_COPY;
 1640                                         } else if (entry->object.vm_object == NULL) {
 1641                                                 entry->object.vm_object =
 1642                                                     vm_object_allocate(OBJT_DEFAULT,
 1643                                                         OFF_TO_IDX(entry->end - entry->start));
 1644                                                 entry->offset = (vm_offset_t) 0;
 1645                                         }
 1646                                         default_pager_convert_to_swapq(entry->object.vm_object);
 1647                                 }
 1648                         }
 1649                         vm_map_clip_start(map, entry, start);
 1650                         vm_map_clip_end(map, entry, end);
 1651                         entry->wired_count++;
 1652 
 1653                         /*
 1654                          * Check for holes
 1655                          */
 1656                         if (entry->end < end &&
 1657                             (entry->next == &map->header ||
 1658                                 entry->next->start > entry->end)) {
 1659                                 /*
 1660                                  * Found one.  Object creation actions do not
 1661                                  * need to be undone, but the wired counts
 1662                                  * need to be restored.
 1663                                  */
 1664                                 while (entry != &map->header && entry->end > start) {
 1665                                         entry->wired_count--;
 1666                                         entry = entry->prev;
 1667                                 }
 1668                                 vm_map_unlock(map);
 1669                                 return (KERN_INVALID_ARGUMENT);
 1670                         }
 1671                         entry = entry->next;
 1672                 }
 1673 
 1674                 /*
 1675                  * Pass 2.
 1676                  */
 1677 
 1678                 /*
 1679                  * HACK HACK HACK HACK
 1680                  *
 1681                  * If we are wiring in the kernel map or a submap of it,
 1682                  * unlock the map to avoid deadlocks.  We trust that the
 1683                  * kernel is well-behaved, and therefore will not do
 1684                  * anything destructive to this region of the map while
 1685                  * we have it unlocked.  We cannot trust user processes
 1686                  * to do the same.
 1687                  *
 1688                  * HACK HACK HACK HACK
 1689                  */
 1690                 if (vm_map_pmap(map) == kernel_pmap) {
 1691                         vm_map_unlock(map);     /* trust me ... */
 1692                 } else {
 1693                         lock_set_recursive(&map->lock);
 1694                         lock_write_to_read(&map->lock);
 1695                 }
 1696 
 1697                 rv = 0;
 1698                 entry = start_entry;
 1699                 while (entry != &map->header && entry->start < end) {
 1700                         /*
 1701                          * If vm_fault_wire fails for any page we need to undo
 1702                          * what has been done.  We decrement the wiring count
 1703                          * for those pages which have not yet been wired (now)
 1704                          * and unwire those that have (later).
 1705                          *
 1706                          * XXX this violates the locking protocol on the map,
 1707                          * needs to be fixed.
 1708                          */
 1709                         if (rv)
 1710                                 entry->wired_count--;
 1711                         else if (entry->wired_count == 1) {
 1712                                 rv = vm_fault_wire(map, entry->start, entry->end);
 1713                                 if (rv) {
 1714                                         failed = entry->start;
 1715                                         entry->wired_count--;
 1716                                 }
 1717                         }
 1718                         entry = entry->next;
 1719                 }
 1720 
 1721                 if (vm_map_pmap(map) == kernel_pmap) {
 1722                         vm_map_lock(map);
 1723                 } else {
 1724                         lock_clear_recursive(&map->lock);
 1725                 }
 1726                 if (rv) {
 1727                         vm_map_unlock(map);
 1728                         (void) vm_map_pageable(map, start, failed, TRUE);
 1729                         return (rv);
 1730                 }
 1731                 vm_map_simplify_entry(map, start_entry);
 1732         }
 1733 
 1734         vm_map_unlock(map);
 1735 
 1736         return (KERN_SUCCESS);
 1737 }
 1738 
 1739 /*
 1740  * vm_map_clean
 1741  *
 1742  * Push any dirty cached pages in the address range to their pager.
 1743  * If syncio is TRUE, dirty pages are written synchronously.
 1744  * If invalidate is TRUE, any cached pages are freed as well.
 1745  *
 1746  * Returns an error if any part of the specified range is not mapped.
 1747  */
 1748 int
 1749 vm_map_clean(map, start, end, syncio, invalidate)
 1750         vm_map_t map;
 1751         vm_offset_t start;
 1752         vm_offset_t end;
 1753         boolean_t syncio;
 1754         boolean_t invalidate;
 1755 {
 1756         register vm_map_entry_t current;
 1757         vm_map_entry_t entry;
 1758         vm_size_t size;
 1759         vm_object_t object;
 1760         vm_ooffset_t offset;
 1761 
 1762         vm_map_lock_read(map);
 1763         VM_MAP_RANGE_CHECK(map, start, end);
 1764         if (!vm_map_lookup_entry(map, start, &entry)) {
 1765                 vm_map_unlock_read(map);
 1766                 return (KERN_INVALID_ADDRESS);
 1767         }
 1768         /*
 1769          * Make a first pass to check for holes.
 1770          */
 1771         for (current = entry; current->start < end; current = current->next) {
 1772                 if (current->eflags & MAP_ENTRY_IS_SUB_MAP) {
 1773                         vm_map_unlock_read(map);
 1774                         return (KERN_INVALID_ARGUMENT);
 1775                 }
 1776                 if (end > current->end &&
 1777                     (current->next == &map->header ||
 1778                         current->end != current->next->start)) {
 1779                         vm_map_unlock_read(map);
 1780                         return (KERN_INVALID_ADDRESS);
 1781                 }
 1782         }
 1783 
 1784         /*
 1785          * Make a second pass, cleaning/uncaching pages from the indicated
 1786          * objects as we go.
 1787          */
 1788         for (current = entry; current->start < end; current = current->next) {
 1789                 offset = current->offset + (start - current->start);
 1790                 size = (end <= current->end ? end : current->end) - start;
 1791                 if (current->eflags & (MAP_ENTRY_IS_A_MAP|MAP_ENTRY_IS_SUB_MAP)) {
 1792                         register vm_map_t smap;
 1793                         vm_map_entry_t tentry;
 1794                         vm_size_t tsize;
 1795 
 1796                         smap = current->object.share_map;
 1797                         vm_map_lock_read(smap);
 1798                         (void) vm_map_lookup_entry(smap, offset, &tentry);
 1799                         tsize = tentry->end - offset;
 1800                         if (tsize < size)
 1801                                 size = tsize;
 1802                         object = tentry->object.vm_object;
 1803                         offset = tentry->offset + (offset - tentry->start);
 1804                         vm_map_unlock_read(smap);
 1805                 } else {
 1806                         object = current->object.vm_object;
 1807                 }
 1808                 /*
 1809                  * Note that there is absolutely no sense in writing out
 1810                  * anonymous objects, so we track down the vnode object
 1811                  * to write out.
 1812                  * We invalidate (remove) all pages from the address space
 1813                  * anyway, for semantic correctness.
 1814                  */
 1815                 while (object->backing_object) {
 1816                         object = object->backing_object;
 1817                         offset += object->backing_object_offset;
 1818                         if (object->size < OFF_TO_IDX( offset + size))
 1819                                 size = IDX_TO_OFF(object->size) - offset;
 1820                 }
 1821                 if (invalidate)
 1822                         pmap_remove(vm_map_pmap(map), current->start,
 1823                                 current->start + size);
 1824                 if (object && (object->type == OBJT_VNODE)) {
 1825                         /*
 1826                          * Flush pages if writing is allowed. XXX should we continue
 1827                          * on an error?
 1828                          *
 1829                          * XXX Doing async I/O and then removing all the pages from
 1830                          *     the object before it completes is probably a very bad
 1831                          *     idea.
 1832                          */
 1833                         if (current->protection & VM_PROT_WRITE) {
 1834                                 vm_object_page_clean(object,
 1835                                         OFF_TO_IDX(offset),
 1836                                         OFF_TO_IDX(offset + size),
 1837                                         (syncio||invalidate)?1:0, TRUE);
 1838                                 if (invalidate)
 1839                                         vm_object_page_remove(object,
 1840                                                 OFF_TO_IDX(offset),
 1841                                                 OFF_TO_IDX(offset + size),
 1842                                                 FALSE);
 1843                         }
 1844                 }
 1845                 start += size;
 1846         }
 1847 
 1848         vm_map_unlock_read(map);
 1849         return (KERN_SUCCESS);
 1850 }
 1851 
 1852 /*
 1853  *      vm_map_entry_unwire:    [ internal use only ]
 1854  *
 1855  *      Make the region specified by this entry pageable.
 1856  *
 1857  *      The map in question should be locked.
 1858  *      [This is the reason for this routine's existence.]
 1859  */
 1860 static void 
 1861 vm_map_entry_unwire(map, entry)
 1862         vm_map_t map;
 1863         register vm_map_entry_t entry;
 1864 {
 1865         vm_fault_unwire(map, entry->start, entry->end);
 1866         entry->wired_count = 0;
 1867 }
 1868 
 1869 /*
 1870  *      vm_map_entry_delete:    [ internal use only ]
 1871  *
 1872  *      Deallocate the given entry from the target map.
 1873  */
 1874 static void
 1875 vm_map_entry_delete(map, entry)
 1876         register vm_map_t map;
 1877         register vm_map_entry_t entry;
 1878 {
 1879         vm_map_entry_unlink(map, entry);
 1880         map->size -= entry->end - entry->start;
 1881 
 1882         if (entry->eflags & (MAP_ENTRY_IS_A_MAP|MAP_ENTRY_IS_SUB_MAP)) {
 1883                 vm_map_deallocate(entry->object.share_map);
 1884         } else {
 1885                 vm_object_deallocate(entry->object.vm_object);
 1886         }
 1887 
 1888         vm_map_entry_dispose(map, entry);
 1889 }
 1890 
 1891 /*
 1892  *      vm_map_delete:  [ internal use only ]
 1893  *
 1894  *      Deallocates the given address range from the target
 1895  *      map.
 1896  *
 1897  *      When called with a sharing map, removes pages from
 1898  *      that region from all physical maps.
 1899  */
 1900 int
 1901 vm_map_delete(map, start, end)
 1902         register vm_map_t map;
 1903         vm_offset_t start;
 1904         register vm_offset_t end;
 1905 {
 1906         register vm_map_entry_t entry;
 1907         vm_map_entry_t first_entry;
 1908 
 1909         /*
 1910          * Find the start of the region, and clip it
 1911          */
 1912 
 1913         if (!vm_map_lookup_entry(map, start, &first_entry))
 1914                 entry = first_entry->next;
 1915         else {
 1916                 entry = first_entry;
 1917                 vm_map_clip_start(map, entry, start);
 1918 
 1919                 /*
 1920                  * Fix the lookup hint now, rather than each time though the
 1921                  * loop.
 1922                  */
 1923 
 1924                 SAVE_HINT(map, entry->prev);
 1925         }
 1926 
 1927         /*
 1928          * Save the free space hint
 1929          */
 1930 
 1931         if (entry == &map->header) {
 1932                 map->first_free = &map->header;
 1933         } else if (map->first_free->start >= start)
 1934                 map->first_free = entry->prev;
 1935 
 1936         /*
 1937          * Step through all entries in this region
 1938          */
 1939 
 1940         while ((entry != &map->header) && (entry->start < end)) {
 1941                 vm_map_entry_t next;
 1942                 vm_offset_t s, e;
 1943                 vm_object_t object;
 1944                 vm_ooffset_t offset;
 1945 
 1946                 vm_map_clip_end(map, entry, end);
 1947 
 1948                 next = entry->next;
 1949                 s = entry->start;
 1950                 e = entry->end;
 1951                 offset = entry->offset;
 1952 
 1953                 /*
 1954                  * Unwire before removing addresses from the pmap; otherwise,
 1955                  * unwiring will put the entries back in the pmap.
 1956                  */
 1957 
 1958                 object = entry->object.vm_object;
 1959                 if (entry->wired_count != 0)
 1960                         vm_map_entry_unwire(map, entry);
 1961 
 1962                 /*
 1963                  * If this is a sharing map, we must remove *all* references
 1964                  * to this data, since we can't find all of the physical maps
 1965                  * which are sharing it.
 1966                  */
 1967 
 1968                 if (object == kernel_object || object == kmem_object) {
 1969                         vm_object_page_remove(object, OFF_TO_IDX(offset),
 1970                             OFF_TO_IDX(offset + (e - s)), FALSE);
 1971                 } else if (!map->is_main_map) {
 1972                         vm_object_pmap_remove(object,
 1973                             OFF_TO_IDX(offset),
 1974                             OFF_TO_IDX(offset + (e - s)));
 1975                 } else {
 1976                         pmap_remove(map->pmap, s, e);
 1977                 }
 1978 
 1979                 /*
 1980                  * Delete the entry (which may delete the object) only after
 1981                  * removing all pmap entries pointing to its pages.
 1982                  * (Otherwise, its page frames may be reallocated, and any
 1983                  * modify bits will be set in the wrong object!)
 1984                  */
 1985 
 1986                 vm_map_entry_delete(map, entry);
 1987                 entry = next;
 1988         }
 1989         return (KERN_SUCCESS);
 1990 }
 1991 
 1992 /*
 1993  *      vm_map_remove:
 1994  *
 1995  *      Remove the given address range from the target map.
 1996  *      This is the exported form of vm_map_delete.
 1997  */
 1998 int
 1999 vm_map_remove(map, start, end)
 2000         register vm_map_t map;
 2001         register vm_offset_t start;
 2002         register vm_offset_t end;
 2003 {
 2004         register int result, s = 0;
 2005 
 2006         if (map == kmem_map || map == mb_map)
 2007                 s = splvm();
 2008 
 2009         vm_map_lock(map);
 2010         VM_MAP_RANGE_CHECK(map, start, end);
 2011         result = vm_map_delete(map, start, end);
 2012         vm_map_unlock(map);
 2013 
 2014         if (map == kmem_map || map == mb_map)
 2015                 splx(s);
 2016 
 2017         return (result);
 2018 }
 2019 
 2020 /*
 2021  *      vm_map_check_protection:
 2022  *
 2023  *      Assert that the target map allows the specified
 2024  *      privilege on the entire address region given.
 2025  *      The entire region must be allocated.
 2026  */
 2027 boolean_t
 2028 vm_map_check_protection(map, start, end, protection)
 2029         register vm_map_t map;
 2030         register vm_offset_t start;
 2031         register vm_offset_t end;
 2032         register vm_prot_t protection;
 2033 {
 2034         register vm_map_entry_t entry;
 2035         vm_map_entry_t tmp_entry;
 2036 
 2037         if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
 2038                 return (FALSE);
 2039         }
 2040         entry = tmp_entry;
 2041 
 2042         while (start < end) {
 2043                 if (entry == &map->header) {
 2044                         return (FALSE);
 2045                 }
 2046                 /*
 2047                  * No holes allowed!
 2048                  */
 2049 
 2050                 if (start < entry->start) {
 2051                         return (FALSE);
 2052                 }
 2053                 /*
 2054                  * Check protection associated with entry.
 2055                  */
 2056 
 2057                 if ((entry->protection & protection) != protection) {
 2058                         return (FALSE);
 2059                 }
 2060                 /* go to next entry */
 2061 
 2062                 start = entry->end;
 2063                 entry = entry->next;
 2064         }
 2065         return (TRUE);
 2066 }
 2067 
 2068 /*
 2069  *      vm_map_copy_entry:
 2070  *
 2071  *      Copies the contents of the source entry to the destination
 2072  *      entry.  The entries *must* be aligned properly.
 2073  */
 2074 static void
 2075 vm_map_copy_entry(src_map, dst_map, src_entry, dst_entry)
 2076         vm_map_t src_map, dst_map;
 2077         register vm_map_entry_t src_entry, dst_entry;
 2078 {
 2079         if ((dst_entry->eflags|src_entry->eflags) &
 2080                 (MAP_ENTRY_IS_A_MAP|MAP_ENTRY_IS_SUB_MAP))
 2081                 return;
 2082 
 2083         if (src_entry->wired_count == 0) {
 2084 
 2085                 /*
 2086                  * If the source entry is marked needs_copy, it is already
 2087                  * write-protected.
 2088                  */
 2089                 if ((src_entry->eflags & MAP_ENTRY_NEEDS_COPY) == 0) {
 2090 
 2091                         boolean_t su;
 2092 
 2093                         /*
 2094                          * If the source entry has only one mapping, we can
 2095                          * just protect the virtual address range.
 2096                          */
 2097                         if (!(su = src_map->is_main_map)) {
 2098                                 su = (src_map->ref_count == 1);
 2099                         }
 2100                         if (su) {
 2101                                 pmap_protect(src_map->pmap,
 2102                                     src_entry->start,
 2103                                     src_entry->end,
 2104                                     src_entry->protection & ~VM_PROT_WRITE);
 2105                         } else {
 2106                                 vm_object_pmap_copy(src_entry->object.vm_object,
 2107                                     OFF_TO_IDX(src_entry->offset),
 2108                                     OFF_TO_IDX(src_entry->offset + (src_entry->end
 2109                                         - src_entry->start)));
 2110                         }
 2111                 }
 2112 
 2113                 /*
 2114                  * Make a copy of the object.
 2115                  */
 2116                 if (src_entry->object.vm_object) {
 2117                         if ((src_entry->object.vm_object->handle == NULL) &&
 2118                                 (src_entry->object.vm_object->type == OBJT_DEFAULT ||
 2119                                  src_entry->object.vm_object->type == OBJT_SWAP))
 2120                                 vm_object_collapse(src_entry->object.vm_object);
 2121                         ++src_entry->object.vm_object->ref_count;
 2122                         src_entry->eflags |= (MAP_ENTRY_COW|MAP_ENTRY_NEEDS_COPY);
 2123                         dst_entry->eflags |= (MAP_ENTRY_COW|MAP_ENTRY_NEEDS_COPY);
 2124                         dst_entry->object.vm_object =
 2125                                 src_entry->object.vm_object;
 2126                         dst_entry->offset = src_entry->offset;
 2127                 } else {
 2128                         dst_entry->object.vm_object = NULL;
 2129                         dst_entry->offset = 0;
 2130                 }
 2131 
 2132                 pmap_copy(dst_map->pmap, src_map->pmap, dst_entry->start,
 2133                     dst_entry->end - dst_entry->start, src_entry->start);
 2134         } else {
 2135                 /*
 2136                  * Of course, wired down pages can't be set copy-on-write.
 2137                  * Cause wired pages to be copied into the new map by
 2138                  * simulating faults (the new pages are pageable)
 2139                  */
 2140                 vm_fault_copy_entry(dst_map, src_map, dst_entry, src_entry);
 2141         }
 2142 }
 2143 
 2144 /*
 2145  * vmspace_fork:
 2146  * Create a new process vmspace structure and vm_map
 2147  * based on those of an existing process.  The new map
 2148  * is based on the old map, according to the inheritance
 2149  * values on the regions in that map.
 2150  *
 2151  * The source map must not be locked.
 2152  */
 2153 struct vmspace *
 2154 vmspace_fork(vm1)
 2155         register struct vmspace *vm1;
 2156 {
 2157         register struct vmspace *vm2;
 2158         vm_map_t old_map = &vm1->vm_map;
 2159         vm_map_t new_map;
 2160         vm_map_entry_t old_entry;
 2161         vm_map_entry_t new_entry;
 2162         pmap_t new_pmap;
 2163         vm_object_t object;
 2164 
 2165         vm_map_lock(old_map);
 2166 
 2167         vm2 = vmspace_alloc(old_map->min_offset, old_map->max_offset,
 2168             old_map->entries_pageable);
 2169         bcopy(&vm1->vm_startcopy, &vm2->vm_startcopy,
 2170             (caddr_t) (vm1 + 1) - (caddr_t) &vm1->vm_startcopy);
 2171         new_pmap = &vm2->vm_pmap;       /* XXX */
 2172         new_map = &vm2->vm_map; /* XXX */
 2173 
 2174         old_entry = old_map->header.next;
 2175 
 2176         while (old_entry != &old_map->header) {
 2177                 if (old_entry->eflags & MAP_ENTRY_IS_SUB_MAP)
 2178                         panic("vm_map_fork: encountered a submap");
 2179 
 2180                 switch (old_entry->inheritance) {
 2181                 case VM_INHERIT_NONE:
 2182                         break;
 2183 
 2184                 case VM_INHERIT_SHARE:
 2185                         /*
 2186                          * Clone the entry, creating the shared object if necessary.
 2187                          */
 2188                         object = old_entry->object.vm_object;
 2189                         if (object == NULL) {
 2190                                 object = vm_object_allocate(OBJT_DEFAULT,
 2191                                                             OFF_TO_IDX(old_entry->end -
 2192                                                                        old_entry->start));
 2193                                 old_entry->object.vm_object = object;
 2194                                 old_entry->offset = (vm_offset_t) 0;
 2195                         } else if (old_entry->eflags & MAP_ENTRY_NEEDS_COPY) {
 2196                                 vm_object_shadow(&old_entry->object.vm_object,
 2197                                                  &old_entry->offset,
 2198                                                  OFF_TO_IDX(old_entry->end -
 2199                                                         old_entry->start));
 2200 
 2201                                 old_entry->eflags &= ~MAP_ENTRY_NEEDS_COPY;
 2202                                 object = old_entry->object.vm_object;
 2203                         }
 2204 
 2205                         /*
 2206                          * Clone the entry, referencing the sharing map.
 2207                          */
 2208                         new_entry = vm_map_entry_create(new_map);
 2209                         *new_entry = *old_entry;
 2210                         new_entry->wired_count = 0;
 2211                         ++object->ref_count;
 2212 
 2213                         /*
 2214                          * Insert the entry into the new map -- we know we're
 2215                          * inserting at the end of the new map.
 2216                          */
 2217 
 2218                         vm_map_entry_link(new_map, new_map->header.prev,
 2219                             new_entry);
 2220 
 2221                         /*
 2222                          * Update the physical map
 2223                          */
 2224 
 2225                         pmap_copy(new_map->pmap, old_map->pmap,
 2226                             new_entry->start,
 2227                             (old_entry->end - old_entry->start),
 2228                             old_entry->start);
 2229                         break;
 2230 
 2231                 case VM_INHERIT_COPY:
 2232                         /*
 2233                          * Clone the entry and link into the map.
 2234                          */
 2235                         new_entry = vm_map_entry_create(new_map);
 2236                         *new_entry = *old_entry;
 2237                         new_entry->wired_count = 0;
 2238                         new_entry->object.vm_object = NULL;
 2239                         new_entry->eflags &= ~MAP_ENTRY_IS_A_MAP;
 2240                         vm_map_entry_link(new_map, new_map->header.prev,
 2241                             new_entry);
 2242                         vm_map_copy_entry(old_map, new_map, old_entry,
 2243                             new_entry);
 2244                         break;
 2245                 }
 2246                 old_entry = old_entry->next;
 2247         }
 2248 
 2249         new_map->size = old_map->size;
 2250         vm_map_unlock(old_map);
 2251 
 2252         return (vm2);
 2253 }
 2254 
 2255 /*
 2256  *      vm_map_lookup:
 2257  *
 2258  *      Finds the VM object, offset, and
 2259  *      protection for a given virtual address in the
 2260  *      specified map, assuming a page fault of the
 2261  *      type specified.
 2262  *
 2263  *      Leaves the map in question locked for read; return
 2264  *      values are guaranteed until a vm_map_lookup_done
 2265  *      call is performed.  Note that the map argument
 2266  *      is in/out; the returned map must be used in
 2267  *      the call to vm_map_lookup_done.
 2268  *
 2269  *      A handle (out_entry) is returned for use in
 2270  *      vm_map_lookup_done, to make that fast.
 2271  *
 2272  *      If a lookup is requested with "write protection"
 2273  *      specified, the map may be changed to perform virtual
 2274  *      copying operations, although the data referenced will
 2275  *      remain the same.
 2276  */
 2277 int
 2278 vm_map_lookup(var_map, vaddr, fault_type, out_entry,
 2279     object, pindex, out_prot, wired, single_use)
 2280         vm_map_t *var_map;      /* IN/OUT */
 2281         register vm_offset_t vaddr;
 2282         register vm_prot_t fault_type;
 2283 
 2284         vm_map_entry_t *out_entry;      /* OUT */
 2285         vm_object_t *object;    /* OUT */
 2286         vm_pindex_t *pindex;    /* OUT */
 2287         vm_prot_t *out_prot;    /* OUT */
 2288         boolean_t *wired;       /* OUT */
 2289         boolean_t *single_use;  /* OUT */
 2290 {
 2291         vm_map_t share_map;
 2292         vm_offset_t share_offset;
 2293         register vm_map_entry_t entry;
 2294         register vm_map_t map = *var_map;
 2295         register vm_prot_t prot;
 2296         register boolean_t su;
 2297 
 2298 RetryLookup:;
 2299 
 2300         /*
 2301          * Lookup the faulting address.
 2302          */
 2303 
 2304         vm_map_lock_read(map);
 2305 
 2306 #define RETURN(why) \
 2307                 { \
 2308                 vm_map_unlock_read(map); \
 2309                 return(why); \
 2310                 }
 2311 
 2312         /*
 2313          * If the map has an interesting hint, try it before calling full
 2314          * blown lookup routine.
 2315          */
 2316 
 2317         entry = map->hint;
 2318 
 2319         *out_entry = entry;
 2320 
 2321         if ((entry == &map->header) ||
 2322             (vaddr < entry->start) || (vaddr >= entry->end)) {
 2323                 vm_map_entry_t tmp_entry;
 2324 
 2325                 /*
 2326                  * Entry was either not a valid hint, or the vaddr was not
 2327                  * contained in the entry, so do a full lookup.
 2328                  */
 2329                 if (!vm_map_lookup_entry(map, vaddr, &tmp_entry))
 2330                         RETURN(KERN_INVALID_ADDRESS);
 2331 
 2332                 entry = tmp_entry;
 2333                 *out_entry = entry;
 2334         }
 2335         
 2336         /*
 2337          * Handle submaps.
 2338          */
 2339 
 2340         if (entry->eflags & MAP_ENTRY_IS_SUB_MAP) {
 2341                 vm_map_t old_map = map;
 2342 
 2343                 *var_map = map = entry->object.sub_map;
 2344                 vm_map_unlock_read(old_map);
 2345                 goto RetryLookup;
 2346         }
 2347 
 2348         /*
 2349          * Check whether this task is allowed to have this page.
 2350          * Note the special case for MAP_ENTRY_COW
 2351          * pages with an override.  This is to implement a forced
 2352          * COW for debuggers.
 2353          */
 2354 
 2355         prot = entry->protection;
 2356         if ((fault_type & VM_PROT_OVERRIDE_WRITE) == 0 ||
 2357                 (entry->eflags & MAP_ENTRY_COW) == 0 ||
 2358                 (entry->wired_count != 0)) {
 2359                 if ((fault_type & (prot)) !=
 2360                     (fault_type & ~VM_PROT_OVERRIDE_WRITE))
 2361                         RETURN(KERN_PROTECTION_FAILURE);
 2362         }
 2363 
 2364         /*
 2365          * If this page is not pageable, we have to get it for all possible
 2366          * accesses.
 2367          */
 2368 
 2369         *wired = (entry->wired_count != 0);
 2370         if (*wired)
 2371                 prot = fault_type = entry->protection;
 2372 
 2373         /*
 2374          * If we don't already have a VM object, track it down.
 2375          */
 2376 
 2377         su = (entry->eflags & MAP_ENTRY_IS_A_MAP) == 0;
 2378         if (su) {
 2379                 share_map = map;
 2380                 share_offset = vaddr;
 2381         } else {
 2382                 vm_map_entry_t share_entry;
 2383 
 2384                 /*
 2385                  * Compute the sharing map, and offset into it.
 2386                  */
 2387 
 2388                 share_map = entry->object.share_map;
 2389                 share_offset = (vaddr - entry->start) + entry->offset;
 2390 
 2391                 /*
 2392                  * Look for the backing store object and offset
 2393                  */
 2394 
 2395                 vm_map_lock_read(share_map);
 2396 
 2397                 if (!vm_map_lookup_entry(share_map, share_offset,
 2398                         &share_entry)) {
 2399                         vm_map_unlock_read(share_map);
 2400                         RETURN(KERN_INVALID_ADDRESS);
 2401                 }
 2402                 entry = share_entry;
 2403         }
 2404 
 2405         /*
 2406          * If the entry was copy-on-write, we either ...
 2407          */
 2408 
 2409         if (entry->eflags & MAP_ENTRY_NEEDS_COPY) {
 2410                 /*
 2411                  * If we want to write the page, we may as well handle that
 2412                  * now since we've got the sharing map locked.
 2413                  *
 2414                  * If we don't need to write the page, we just demote the
 2415                  * permissions allowed.
 2416                  */
 2417 
 2418                 if (fault_type & VM_PROT_WRITE) {
 2419                         /*
 2420                          * Make a new object, and place it in the object
 2421                          * chain.  Note that no new references have appeared
 2422                          * -- one just moved from the share map to the new
 2423                          * object.
 2424                          */
 2425 
 2426                         if (lock_read_to_write(&share_map->lock)) {
 2427                                 if (share_map != map)
 2428                                         vm_map_unlock_read(map);
 2429                                 goto RetryLookup;
 2430                         }
 2431                         vm_object_shadow(
 2432                             &entry->object.vm_object,
 2433                             &entry->offset,
 2434                             OFF_TO_IDX(entry->end - entry->start));
 2435 
 2436                         entry->eflags &= ~MAP_ENTRY_NEEDS_COPY;
 2437 
 2438                         lock_write_to_read(&share_map->lock);
 2439                 } else {
 2440                         /*
 2441                          * We're attempting to read a copy-on-write page --
 2442                          * don't allow writes.
 2443                          */
 2444 
 2445                         prot &= (~VM_PROT_WRITE);
 2446                 }
 2447         }
 2448         /*
 2449          * Create an object if necessary.
 2450          */
 2451         if (entry->object.vm_object == NULL) {
 2452 
 2453                 if (lock_read_to_write(&share_map->lock)) {
 2454                         if (share_map != map)
 2455                                 vm_map_unlock_read(map);
 2456                         goto RetryLookup;
 2457                 }
 2458                 entry->object.vm_object = vm_object_allocate(OBJT_DEFAULT,
 2459                     OFF_TO_IDX(entry->end - entry->start));
 2460                 entry->offset = 0;
 2461                 lock_write_to_read(&share_map->lock);
 2462         }
 2463 
 2464         if (entry->object.vm_object != NULL)
 2465                 default_pager_convert_to_swapq(entry->object.vm_object);
 2466         /*
 2467          * Return the object/offset from this entry.  If the entry was
 2468          * copy-on-write or empty, it has been fixed up.
 2469          */
 2470 
 2471         *pindex = OFF_TO_IDX((share_offset - entry->start) + entry->offset);
 2472         *object = entry->object.vm_object;
 2473 
 2474         /*
 2475          * Return whether this is the only map sharing this data.
 2476          */
 2477 
 2478         if (!su) {
 2479                 su = (share_map->ref_count == 1);
 2480         }
 2481         *out_prot = prot;
 2482         *single_use = su;
 2483 
 2484         return (KERN_SUCCESS);
 2485 
 2486 #undef  RETURN
 2487 }
 2488 
 2489 /*
 2490  *      vm_map_lookup_done:
 2491  *
 2492  *      Releases locks acquired by a vm_map_lookup
 2493  *      (according to the handle returned by that lookup).
 2494  */
 2495 
 2496 void
 2497 vm_map_lookup_done(map, entry)
 2498         register vm_map_t map;
 2499         vm_map_entry_t entry;
 2500 {
 2501         /*
 2502          * If this entry references a map, unlock it first.
 2503          */
 2504 
 2505         if (entry->eflags & MAP_ENTRY_IS_A_MAP)
 2506                 vm_map_unlock_read(entry->object.share_map);
 2507 
 2508         /*
 2509          * Unlock the main-level map
 2510          */
 2511 
 2512         vm_map_unlock_read(map);
 2513 }
 2514 
 2515 #include "opt_ddb.h"
 2516 #ifdef DDB
 2517 #include <sys/kernel.h>
 2518 
 2519 #include <ddb/ddb.h>
 2520 
 2521 /*
 2522  *      vm_map_print:   [ debug ]
 2523  */
 2524 DB_SHOW_COMMAND(map, vm_map_print)
 2525 {
 2526         /* XXX convert args. */
 2527         register vm_map_t map = (vm_map_t)addr;
 2528         boolean_t full = have_addr;
 2529 
 2530         register vm_map_entry_t entry;
 2531 
 2532         db_iprintf("%s map 0x%x: pmap=0x%x,ref=%d,nentries=%d,version=%d\n",
 2533             (map->is_main_map ? "Task" : "Share"),
 2534             (int) map, (int) (map->pmap), map->ref_count, map->nentries,
 2535             map->timestamp);
 2536 
 2537         if (!full && db_indent)
 2538                 return;
 2539 
 2540         db_indent += 2;
 2541         for (entry = map->header.next; entry != &map->header;
 2542             entry = entry->next) {
 2543                 db_iprintf("map entry 0x%x: start=0x%x, end=0x%x, ",
 2544                     (int) entry, (int) entry->start, (int) entry->end);
 2545                 if (map->is_main_map) {
 2546                         static char *inheritance_name[4] =
 2547                         {"share", "copy", "none", "donate_copy"};
 2548 
 2549                         db_printf("prot=%x/%x/%s, ",
 2550                             entry->protection,
 2551                             entry->max_protection,
 2552                             inheritance_name[entry->inheritance]);
 2553                         if (entry->wired_count != 0)
 2554                                 db_printf("wired, ");
 2555                 }
 2556                 if (entry->eflags & (MAP_ENTRY_IS_A_MAP|MAP_ENTRY_IS_SUB_MAP)) {
 2557                         db_printf("share=0x%x, offset=0x%x\n",
 2558                             (int) entry->object.share_map,
 2559                             (int) entry->offset);
 2560                         if ((entry->prev == &map->header) ||
 2561                             ((entry->prev->eflags & MAP_ENTRY_IS_A_MAP) == 0) ||
 2562                             (entry->prev->object.share_map !=
 2563                                 entry->object.share_map)) {
 2564                                 db_indent += 2;
 2565                                 vm_map_print((int)entry->object.share_map,
 2566                                              full, 0, (char *)0);
 2567                                 db_indent -= 2;
 2568                         }
 2569                 } else {
 2570                         db_printf("object=0x%x, offset=0x%x",
 2571                             (int) entry->object.vm_object,
 2572                             (int) entry->offset);
 2573                         if (entry->eflags & MAP_ENTRY_COW)
 2574                                 db_printf(", copy (%s)",
 2575                                     (entry->eflags & MAP_ENTRY_NEEDS_COPY) ? "needed" : "done");
 2576                         db_printf("\n");
 2577 
 2578                         if ((entry->prev == &map->header) ||
 2579                             (entry->prev->eflags & MAP_ENTRY_IS_A_MAP) ||
 2580                             (entry->prev->object.vm_object !=
 2581                                 entry->object.vm_object)) {
 2582                                 db_indent += 2;
 2583                                 vm_object_print((int)entry->object.vm_object,
 2584                                                 full, 0, (char *)0);
 2585                                 db_indent -= 2;
 2586                         }
 2587                 }
 2588         }
 2589         db_indent -= 2;
 2590 }
 2591 #endif /* DDB */

Cache object: 83530d4b5632f2739ee940fbf26ab23d


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.