uvm_aobj.c

Version: - FREEBSD - FREEBSD-13-STABLE - FREEBSD-13-0 - FREEBSD-12-STABLE - FREEBSD-12-0 - FREEBSD-11-STABLE - FREEBSD-11-0 - FREEBSD-10-STABLE - FREEBSD-10-0 - FREEBSD-9-STABLE - FREEBSD-9-0 - FREEBSD-8-STABLE - FREEBSD-8-0 - FREEBSD-7-STABLE - FREEBSD-7-0 - FREEBSD-6-STABLE - FREEBSD-6-0 - FREEBSD-5-STABLE - FREEBSD-5-0 - FREEBSD-4-STABLE - FREEBSD-3-STABLE - FREEBSD22 - l41 - OPENBSD - linux-2.6 - MK84 - PLAN9 - xnu-8792
SearchContext: - none - 3 - 10
    1 /*      $OpenBSD: uvm_aobj.c,v 1.107 2022/08/29 02:58:13 jsg Exp $      */
    2 /*      $NetBSD: uvm_aobj.c,v 1.39 2001/02/18 21:19:08 chs Exp $        */
    3 
    4 /*
    5  * Copyright (c) 1998 Chuck Silvers, Charles D. Cranor and
    6  *                    Washington University.
    7  * All rights reserved.
    8  *
    9  * Redistribution and use in source and binary forms, with or without
   10  * modification, are permitted provided that the following conditions
   11  * are met:
   12  * 1. Redistributions of source code must retain the above copyright
   13  *    notice, this list of conditions and the following disclaimer.
   14  * 2. Redistributions in binary form must reproduce the above copyright
   15  *    notice, this list of conditions and the following disclaimer in the
   16  *    documentation and/or other materials provided with the distribution.
   17  *
   18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
   19  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
   20  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
   21  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
   22  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
   23  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
   24  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
   25  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
   26  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
   27  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
   28  *
   29  * from: Id: uvm_aobj.c,v 1.1.2.5 1998/02/06 05:14:38 chs Exp
   30  */
   31 /*
   32  * uvm_aobj.c: anonymous memory uvm_object pager
   33  *
   34  * author: Chuck Silvers <chuq@chuq.com>
   35  * started: Jan-1998
   36  *
   37  * - design mostly from Chuck Cranor
   38  */
   39 
   40 #include <sys/param.h>
   41 #include <sys/systm.h>
   42 #include <sys/malloc.h>
   43 #include <sys/kernel.h>
   44 #include <sys/pool.h>
   45 #include <sys/stdint.h>
   46 #include <sys/atomic.h>
   47 
   48 #include <uvm/uvm.h>
   49 
   50 /*
   51  * An anonymous UVM object (aobj) manages anonymous-memory.  In addition to
   52  * keeping the list of resident pages, it may also keep a list of allocated
   53  * swap blocks.  Depending on the size of the object, this list is either
   54  * stored in an array (small objects) or in a hash table (large objects).
   55  */
   56 
   57 /*
   58  * Note: for hash tables, we break the address space of the aobj into blocks
   59  * of UAO_SWHASH_CLUSTER_SIZE pages, which shall be a power of two.
   60  */
   61 #define UAO_SWHASH_CLUSTER_SHIFT        4
   62 #define UAO_SWHASH_CLUSTER_SIZE         (1 << UAO_SWHASH_CLUSTER_SHIFT)
   63 
   64 /* Get the "tag" for this page index. */
   65 #define UAO_SWHASH_ELT_TAG(idx)         ((idx) >> UAO_SWHASH_CLUSTER_SHIFT)
   66 #define UAO_SWHASH_ELT_PAGESLOT_IDX(idx) \
   67     ((idx) & (UAO_SWHASH_CLUSTER_SIZE - 1))
   68 
   69 /* Given an ELT and a page index, find the swap slot. */
   70 #define UAO_SWHASH_ELT_PAGESLOT(elt, idx) \
   71     ((elt)->slots[UAO_SWHASH_ELT_PAGESLOT_IDX(idx)])
   72 
   73 /* Given an ELT, return its pageidx base. */
   74 #define UAO_SWHASH_ELT_PAGEIDX_BASE(elt) \
   75     ((elt)->tag << UAO_SWHASH_CLUSTER_SHIFT)
   76 
   77 /* The hash function. */
   78 #define UAO_SWHASH_HASH(aobj, idx) \
   79     (&(aobj)->u_swhash[(((idx) >> UAO_SWHASH_CLUSTER_SHIFT) \
   80     & (aobj)->u_swhashmask)])
   81 
   82 /*
   83  * The threshold which determines whether we will use an array or a
   84  * hash table to store the list of allocated swap blocks.
   85  */
   86 #define UAO_SWHASH_THRESHOLD            (UAO_SWHASH_CLUSTER_SIZE * 4)
   87 #define UAO_USES_SWHASH(aobj) \
   88     ((aobj)->u_pages > UAO_SWHASH_THRESHOLD)
   89 
   90 /* The number of buckets in a hash, with an upper bound. */
   91 #define UAO_SWHASH_MAXBUCKETS           256
   92 #define UAO_SWHASH_BUCKETS(pages) \
   93     (min((pages) >> UAO_SWHASH_CLUSTER_SHIFT, UAO_SWHASH_MAXBUCKETS))
   94 
   95 
   96 /*
   97  * uao_swhash_elt: when a hash table is being used, this structure defines
   98  * the format of an entry in the bucket list.
   99  */
  100 struct uao_swhash_elt {
  101         LIST_ENTRY(uao_swhash_elt) list;        /* the hash list */
  102         voff_t tag;                             /* our 'tag' */
  103         int count;                              /* our number of active slots */
  104         int slots[UAO_SWHASH_CLUSTER_SIZE];     /* the slots */
  105 };
  106 
  107 /*
  108  * uao_swhash: the swap hash table structure
  109  */
  110 LIST_HEAD(uao_swhash, uao_swhash_elt);
  111 
  112 /*
  113  * uao_swhash_elt_pool: pool of uao_swhash_elt structures
  114  */
  115 struct pool uao_swhash_elt_pool;
  116 
  117 /*
  118  * uvm_aobj: the actual anon-backed uvm_object
  119  *
  120  * => the uvm_object is at the top of the structure, this allows
  121  *   (struct uvm_aobj *) == (struct uvm_object *)
  122  * => only one of u_swslots and u_swhash is used in any given aobj
  123  */
  124 struct uvm_aobj {
  125         struct uvm_object u_obj; /* has: pgops, memt, #pages, #refs */
  126         int u_pages;             /* number of pages in entire object */
  127         int u_flags;             /* the flags (see uvm_aobj.h) */
  128         /*
  129          * Either an array or hashtable (array of bucket heads) of
  130          * offset -> swapslot mappings for the aobj.
  131          */
  132 #define u_swslots       u_swap.slot_array 
  133 #define u_swhash        u_swap.slot_hash
  134         union swslots {
  135                 int                     *slot_array;
  136                 struct uao_swhash       *slot_hash;
  137         } u_swap;
  138         u_long u_swhashmask;            /* mask for hashtable */
  139         LIST_ENTRY(uvm_aobj) u_list;    /* global list of aobjs */
  140 };
  141 
  142 struct pool uvm_aobj_pool;
  143 
  144 static struct uao_swhash_elt    *uao_find_swhash_elt(struct uvm_aobj *, int,
  145                                      boolean_t);
  146 static int                       uao_find_swslot(struct uvm_object *, int);
  147 static boolean_t                 uao_flush(struct uvm_object *, voff_t,
  148                                      voff_t, int);
  149 static void                      uao_free(struct uvm_aobj *);
  150 static int                       uao_get(struct uvm_object *, voff_t,
  151                                      vm_page_t *, int *, int, vm_prot_t,
  152                                      int, int);
  153 static boolean_t                 uao_pagein(struct uvm_aobj *, int, int);
  154 static boolean_t                 uao_pagein_page(struct uvm_aobj *, int);
  155 
  156 void    uao_dropswap_range(struct uvm_object *, voff_t, voff_t);
  157 void    uao_shrink_flush(struct uvm_object *, int, int);
  158 int     uao_shrink_hash(struct uvm_object *, int);
  159 int     uao_shrink_array(struct uvm_object *, int);
  160 int     uao_shrink_convert(struct uvm_object *, int);
  161 
  162 int     uao_grow_hash(struct uvm_object *, int);
  163 int     uao_grow_array(struct uvm_object *, int);
  164 int     uao_grow_convert(struct uvm_object *, int);
  165 
  166 /*
  167  * aobj_pager
  168  *
  169  * note that some functions (e.g. put) are handled elsewhere
  170  */
  171 const struct uvm_pagerops aobj_pager = {
  172         .pgo_reference = uao_reference,
  173         .pgo_detach = uao_detach,
  174         .pgo_flush = uao_flush,
  175         .pgo_get = uao_get,
  176 };
  177 
  178 /*
  179  * uao_list: global list of active aobjs, locked by uao_list_lock
  180  *
  181  * Lock ordering: generally the locking order is object lock, then list lock.
  182  * in the case of swap off we have to iterate over the list, and thus the
  183  * ordering is reversed. In that case we must use trylocking to prevent
  184  * deadlock.
  185  */
  186 static LIST_HEAD(aobjlist, uvm_aobj) uao_list = LIST_HEAD_INITIALIZER(uao_list);
  187 static struct mutex uao_list_lock = MUTEX_INITIALIZER(IPL_MPFLOOR);
  188 
  189 
  190 /*
  191  * functions
  192  */
  193 /*
  194  * hash table/array related functions
  195  */
  196 /*
  197  * uao_find_swhash_elt: find (or create) a hash table entry for a page
  198  * offset.
  199  */
  200 static struct uao_swhash_elt *
  201 uao_find_swhash_elt(struct uvm_aobj *aobj, int pageidx, boolean_t create)
  202 {
  203         struct uao_swhash *swhash;
  204         struct uao_swhash_elt *elt;
  205         voff_t page_tag;
  206 
  207         swhash = UAO_SWHASH_HASH(aobj, pageidx); /* first hash to get bucket */
  208         page_tag = UAO_SWHASH_ELT_TAG(pageidx); /* tag to search for */
  209 
  210         /*
  211          * now search the bucket for the requested tag
  212          */
  213         LIST_FOREACH(elt, swhash, list) {
  214                 if (elt->tag == page_tag)
  215                         return elt;
  216         }
  217 
  218         if (!create)
  219                 return NULL;
  220 
  221         /*
  222          * allocate a new entry for the bucket and init/insert it in
  223          */
  224         elt = pool_get(&uao_swhash_elt_pool, PR_NOWAIT | PR_ZERO);
  225         /*
  226          * XXX We cannot sleep here as the hash table might disappear
  227          * from under our feet.  And we run the risk of deadlocking
  228          * the pagedeamon.  In fact this code will only be called by
  229          * the pagedaemon and allocation will only fail if we
  230          * exhausted the pagedeamon reserve.  In that case we're
  231          * doomed anyway, so panic.
  232          */
  233         if (elt == NULL)
  234                 panic("%s: can't allocate entry", __func__);
  235         LIST_INSERT_HEAD(swhash, elt, list);
  236         elt->tag = page_tag;
  237 
  238         return elt;
  239 }
  240 
  241 /*
  242  * uao_find_swslot: find the swap slot number for an aobj/pageidx
  243  */
  244 static inline int
  245 uao_find_swslot(struct uvm_object *uobj, int pageidx)
  246 {
  247         struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
  248 
  249         KASSERT(UVM_OBJ_IS_AOBJ(uobj));
  250 
  251         /*
  252          * if noswap flag is set, then we never return a slot
  253          */
  254         if (aobj->u_flags & UAO_FLAG_NOSWAP)
  255                 return 0;
  256 
  257         /*
  258          * if hashing, look in hash table.
  259          */
  260         if (UAO_USES_SWHASH(aobj)) {
  261                 struct uao_swhash_elt *elt =
  262                     uao_find_swhash_elt(aobj, pageidx, FALSE);
  263 
  264                 if (elt)
  265                         return UAO_SWHASH_ELT_PAGESLOT(elt, pageidx);
  266                 else
  267                         return 0;
  268         }
  269 
  270         /*
  271          * otherwise, look in the array
  272          */
  273         return aobj->u_swslots[pageidx];
  274 }
  275 
  276 /*
  277  * uao_set_swslot: set the swap slot for a page in an aobj.
  278  *
  279  * => setting a slot to zero frees the slot
  280  * => object must be locked by caller
  281  * => we return the old slot number, or -1 if we failed to allocate
  282  *    memory to record the new slot number
  283  */
  284 int
  285 uao_set_swslot(struct uvm_object *uobj, int pageidx, int slot)
  286 {
  287         struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
  288         int oldslot;
  289 
  290         KASSERT(rw_write_held(uobj->vmobjlock) || uobj->uo_refs == 0);
  291         KASSERT(UVM_OBJ_IS_AOBJ(uobj));
  292 
  293         /*
  294          * if noswap flag is set, then we can't set a slot
  295          */
  296         if (aobj->u_flags & UAO_FLAG_NOSWAP) {
  297                 if (slot == 0)
  298                         return 0;               /* a clear is ok */
  299 
  300                 /* but a set is not */
  301                 printf("uao_set_swslot: uobj = %p\n", uobj);
  302             panic("uao_set_swslot: attempt to set a slot on a NOSWAP object");
  303         }
  304 
  305         /*
  306          * are we using a hash table?  if so, add it in the hash.
  307          */
  308         if (UAO_USES_SWHASH(aobj)) {
  309                 /*
  310                  * Avoid allocating an entry just to free it again if
  311                  * the page had not swap slot in the first place, and
  312                  * we are freeing.
  313                  */
  314                 struct uao_swhash_elt *elt =
  315                     uao_find_swhash_elt(aobj, pageidx, slot ? TRUE : FALSE);
  316                 if (elt == NULL) {
  317                         KASSERT(slot == 0);
  318                         return 0;
  319                 }
  320 
  321                 oldslot = UAO_SWHASH_ELT_PAGESLOT(elt, pageidx);
  322                 UAO_SWHASH_ELT_PAGESLOT(elt, pageidx) = slot;
  323 
  324                 /*
  325                  * now adjust the elt's reference counter and free it if we've
  326                  * dropped it to zero.
  327                  */
  328                 if (slot) {
  329                         if (oldslot == 0)
  330                                 elt->count++;
  331                 } else {
  332                         if (oldslot)
  333                                 elt->count--;
  334 
  335                         if (elt->count == 0) {
  336                                 LIST_REMOVE(elt, list);
  337                                 pool_put(&uao_swhash_elt_pool, elt);
  338                         }
  339                 }
  340         } else {
  341                 /* we are using an array */
  342                 oldslot = aobj->u_swslots[pageidx];
  343                 aobj->u_swslots[pageidx] = slot;
  344         }
  345         return oldslot;
  346 }
  347 /*
  348  * end of hash/array functions
  349  */
  350 
  351 /*
  352  * uao_free: free all resources held by an aobj, and then free the aobj
  353  *
  354  * => the aobj should be dead
  355  */
  356 static void
  357 uao_free(struct uvm_aobj *aobj)
  358 {
  359         struct uvm_object *uobj = &aobj->u_obj;
  360 
  361         KASSERT(UVM_OBJ_IS_AOBJ(uobj));
  362         KASSERT(rw_write_held(uobj->vmobjlock));
  363         uao_dropswap_range(uobj, 0, 0);
  364         rw_exit(uobj->vmobjlock);
  365 
  366         if (UAO_USES_SWHASH(aobj)) {
  367                 /*
  368                  * free the hash table itself.
  369                  */
  370                 hashfree(aobj->u_swhash, UAO_SWHASH_BUCKETS(aobj->u_pages), M_UVMAOBJ);
  371         } else {
  372                 free(aobj->u_swslots, M_UVMAOBJ, aobj->u_pages * sizeof(int));
  373         }
  374 
  375         /*
  376          * finally free the aobj itself
  377          */
  378         uvm_obj_destroy(uobj);
  379         pool_put(&uvm_aobj_pool, aobj);
  380 }
  381 
  382 /*
  383  * pager functions
  384  */
  385 
  386 #ifdef TMPFS
  387 /*
  388  * Shrink an aobj to a given number of pages. The procedure is always the same:
  389  * assess the necessity of data structure conversion (hash to array), secure
  390  * resources, flush pages and drop swap slots.
  391  *
  392  */
  393 
  394 void
  395 uao_shrink_flush(struct uvm_object *uobj, int startpg, int endpg)
  396 {
  397         KASSERT(startpg < endpg);
  398         KASSERT(uobj->uo_refs == 1);
  399         uao_flush(uobj, (voff_t)startpg << PAGE_SHIFT,
  400             (voff_t)endpg << PAGE_SHIFT, PGO_FREE);
  401         uao_dropswap_range(uobj, startpg, endpg);
  402 }
  403 
  404 int
  405 uao_shrink_hash(struct uvm_object *uobj, int pages)
  406 {
  407         struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
  408         struct uao_swhash *new_swhash;
  409         struct uao_swhash_elt *elt;
  410         unsigned long new_hashmask;
  411         int i;
  412 
  413         KASSERT(UAO_USES_SWHASH(aobj));
  414 
  415         /*
  416          * If the size of the hash table doesn't change, all we need to do is
  417          * to adjust the page count.
  418          */
  419         if (UAO_SWHASH_BUCKETS(aobj->u_pages) == UAO_SWHASH_BUCKETS(pages)) {
  420                 uao_shrink_flush(uobj, pages, aobj->u_pages);
  421                 aobj->u_pages = pages;
  422                 return 0;
  423         }
  424 
  425         new_swhash = hashinit(UAO_SWHASH_BUCKETS(pages), M_UVMAOBJ,
  426             M_WAITOK | M_CANFAIL, &new_hashmask);
  427         if (new_swhash == NULL)
  428                 return ENOMEM;
  429 
  430         uao_shrink_flush(uobj, pages, aobj->u_pages);
  431 
  432         /*
  433          * Even though the hash table size is changing, the hash of the buckets
  434          * we are interested in copying should not change.
  435          */
  436         for (i = 0; i < UAO_SWHASH_BUCKETS(aobj->u_pages); i++) {
  437                 while (LIST_EMPTY(&aobj->u_swhash[i]) == 0) {
  438                         elt = LIST_FIRST(&aobj->u_swhash[i]);
  439                         LIST_REMOVE(elt, list);
  440                         LIST_INSERT_HEAD(&new_swhash[i], elt, list);
  441                 }
  442         }
  443 
  444         hashfree(aobj->u_swhash, UAO_SWHASH_BUCKETS(aobj->u_pages), M_UVMAOBJ);
  445 
  446         aobj->u_swhash = new_swhash;
  447         aobj->u_pages = pages;
  448         aobj->u_swhashmask = new_hashmask;
  449 
  450         return 0;
  451 }
  452 
  453 int
  454 uao_shrink_convert(struct uvm_object *uobj, int pages)
  455 {
  456         struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
  457         struct uao_swhash_elt *elt;
  458         int i, *new_swslots;
  459 
  460         new_swslots = mallocarray(pages, sizeof(int), M_UVMAOBJ,
  461             M_WAITOK | M_CANFAIL | M_ZERO);
  462         if (new_swslots == NULL)
  463                 return ENOMEM;
  464 
  465         uao_shrink_flush(uobj, pages, aobj->u_pages);
  466 
  467         /* Convert swap slots from hash to array.  */
  468         for (i = 0; i < pages; i++) {
  469                 elt = uao_find_swhash_elt(aobj, i, FALSE);
  470                 if (elt != NULL) {
  471                         new_swslots[i] = UAO_SWHASH_ELT_PAGESLOT(elt, i);
  472                         if (new_swslots[i] != 0)
  473                                 elt->count--;
  474                         if (elt->count == 0) {
  475                                 LIST_REMOVE(elt, list);
  476                                 pool_put(&uao_swhash_elt_pool, elt);
  477                         }
  478                 }
  479         }
  480 
  481         hashfree(aobj->u_swhash, UAO_SWHASH_BUCKETS(aobj->u_pages), M_UVMAOBJ);
  482 
  483         aobj->u_swslots = new_swslots;
  484         aobj->u_pages = pages;
  485 
  486         return 0;
  487 }
  488 
  489 int
  490 uao_shrink_array(struct uvm_object *uobj, int pages)
  491 {
  492         struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
  493         int i, *new_swslots;
  494 
  495         new_swslots = mallocarray(pages, sizeof(int), M_UVMAOBJ,
  496             M_WAITOK | M_CANFAIL | M_ZERO);
  497         if (new_swslots == NULL)
  498                 return ENOMEM;
  499 
  500         uao_shrink_flush(uobj, pages, aobj->u_pages);
  501 
  502         for (i = 0; i < pages; i++)
  503                 new_swslots[i] = aobj->u_swslots[i];
  504 
  505         free(aobj->u_swslots, M_UVMAOBJ, aobj->u_pages * sizeof(int));
  506 
  507         aobj->u_swslots = new_swslots;
  508         aobj->u_pages = pages;
  509 
  510         return 0;
  511 }
  512 
  513 int
  514 uao_shrink(struct uvm_object *uobj, int pages)
  515 {
  516         struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
  517 
  518         KASSERT(pages < aobj->u_pages);
  519 
  520         /*
  521          * Distinguish between three possible cases:
  522          * 1. aobj uses hash and must be converted to array.
  523          * 2. aobj uses array and array size needs to be adjusted.
  524          * 3. aobj uses hash and hash size needs to be adjusted.
  525          */
  526         if (pages > UAO_SWHASH_THRESHOLD)
  527                 return uao_shrink_hash(uobj, pages);    /* case 3 */
  528         else if (aobj->u_pages > UAO_SWHASH_THRESHOLD)
  529                 return uao_shrink_convert(uobj, pages); /* case 1 */
  530         else
  531                 return uao_shrink_array(uobj, pages);   /* case 2 */
  532 }
  533 
  534 /*
  535  * Grow an aobj to a given number of pages. Right now we only adjust the swap
  536  * slots. We could additionally handle page allocation directly, so that they
  537  * don't happen through uvm_fault(). That would allow us to use another
  538  * mechanism for the swap slots other than malloc(). It is thus mandatory that
  539  * the caller of these functions does not allow faults to happen in case of
  540  * growth error.
  541  */
  542 int
  543 uao_grow_array(struct uvm_object *uobj, int pages)
  544 {
  545         struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
  546         int i, *new_swslots;
  547 
  548         KASSERT(aobj->u_pages <= UAO_SWHASH_THRESHOLD);
  549 
  550         new_swslots = mallocarray(pages, sizeof(int), M_UVMAOBJ,
  551             M_WAITOK | M_CANFAIL | M_ZERO);
  552         if (new_swslots == NULL)
  553                 return ENOMEM;
  554 
  555         for (i = 0; i < aobj->u_pages; i++)
  556                 new_swslots[i] = aobj->u_swslots[i];
  557 
  558         free(aobj->u_swslots, M_UVMAOBJ, aobj->u_pages * sizeof(int));
  559 
  560         aobj->u_swslots = new_swslots;
  561         aobj->u_pages = pages;
  562 
  563         return 0;
  564 }
  565 
  566 int
  567 uao_grow_hash(struct uvm_object *uobj, int pages)
  568 {
  569         struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
  570         struct uao_swhash *new_swhash;
  571         struct uao_swhash_elt *elt;
  572         unsigned long new_hashmask;
  573         int i;
  574 
  575         KASSERT(pages > UAO_SWHASH_THRESHOLD);
  576 
  577         /*
  578          * If the size of the hash table doesn't change, all we need to do is
  579          * to adjust the page count.
  580          */
  581         if (UAO_SWHASH_BUCKETS(aobj->u_pages) == UAO_SWHASH_BUCKETS(pages)) {
  582                 aobj->u_pages = pages;
  583                 return 0;
  584         }
  585 
  586         KASSERT(UAO_SWHASH_BUCKETS(aobj->u_pages) < UAO_SWHASH_BUCKETS(pages));
  587 
  588         new_swhash = hashinit(UAO_SWHASH_BUCKETS(pages), M_UVMAOBJ,
  589             M_WAITOK | M_CANFAIL, &new_hashmask);
  590         if (new_swhash == NULL)
  591                 return ENOMEM;
  592 
  593         for (i = 0; i < UAO_SWHASH_BUCKETS(aobj->u_pages); i++) {
  594                 while (LIST_EMPTY(&aobj->u_swhash[i]) == 0) {
  595                         elt = LIST_FIRST(&aobj->u_swhash[i]);
  596                         LIST_REMOVE(elt, list);
  597                         LIST_INSERT_HEAD(&new_swhash[i], elt, list);
  598                 }
  599         }
  600 
  601         hashfree(aobj->u_swhash, UAO_SWHASH_BUCKETS(aobj->u_pages), M_UVMAOBJ);
  602 
  603         aobj->u_swhash = new_swhash;
  604         aobj->u_pages = pages;
  605         aobj->u_swhashmask = new_hashmask;
  606 
  607         return 0;
  608 }
  609 
  610 int
  611 uao_grow_convert(struct uvm_object *uobj, int pages)
  612 {
  613         struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
  614         struct uao_swhash *new_swhash;
  615         struct uao_swhash_elt *elt;
  616         unsigned long new_hashmask;
  617         int i, *old_swslots;
  618 
  619         new_swhash = hashinit(UAO_SWHASH_BUCKETS(pages), M_UVMAOBJ,
  620             M_WAITOK | M_CANFAIL, &new_hashmask);
  621         if (new_swhash == NULL)
  622                 return ENOMEM;
  623 
  624         /* Set these now, so we can use uao_find_swhash_elt(). */
  625         old_swslots = aobj->u_swslots;
  626         aobj->u_swhash = new_swhash;            
  627         aobj->u_swhashmask = new_hashmask;
  628 
  629         for (i = 0; i < aobj->u_pages; i++) {
  630                 if (old_swslots[i] != 0) {
  631                         elt = uao_find_swhash_elt(aobj, i, TRUE);
  632                         elt->count++;
  633                         UAO_SWHASH_ELT_PAGESLOT(elt, i) = old_swslots[i];
  634                 }
  635         }
  636 
  637         free(old_swslots, M_UVMAOBJ, aobj->u_pages * sizeof(int));
  638         aobj->u_pages = pages;
  639 
  640         return 0;
  641 }
  642 
  643 int
  644 uao_grow(struct uvm_object *uobj, int pages)
  645 {
  646         struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
  647 
  648         KASSERT(pages > aobj->u_pages);
  649 
  650         /*
  651          * Distinguish between three possible cases:
  652          * 1. aobj uses hash and hash size needs to be adjusted.
  653          * 2. aobj uses array and array size needs to be adjusted.
  654          * 3. aobj uses array and must be converted to hash.
  655          */
  656         if (pages <= UAO_SWHASH_THRESHOLD)
  657                 return uao_grow_array(uobj, pages);     /* case 2 */
  658         else if (aobj->u_pages > UAO_SWHASH_THRESHOLD)
  659                 return uao_grow_hash(uobj, pages);      /* case 1 */
  660         else
  661                 return uao_grow_convert(uobj, pages);
  662 }
  663 #endif /* TMPFS */
  664 
  665 /*
  666  * uao_create: create an aobj of the given size and return its uvm_object.
  667  *
  668  * => for normal use, flags are zero or UAO_FLAG_CANFAIL.
  669  * => for the kernel object, the flags are:
  670  *      UAO_FLAG_KERNOBJ - allocate the kernel object (can only happen once)
  671  *      UAO_FLAG_KERNSWAP - enable swapping of kernel object ("           ")
  672  */
  673 struct uvm_object *
  674 uao_create(vsize_t size, int flags)
  675 {
  676         static struct uvm_aobj kernel_object_store;
  677         static struct rwlock bootstrap_kernel_object_lock;
  678         static int kobj_alloced = 0;
  679         int pages = round_page(size) >> PAGE_SHIFT;
  680         struct uvm_aobj *aobj;
  681         int refs;
  682 
  683         /*
  684          * Allocate a new aobj, unless kernel object is requested.
  685          */
  686         if (flags & UAO_FLAG_KERNOBJ) {
  687                 KASSERT(!kobj_alloced);
  688                 aobj = &kernel_object_store;
  689                 aobj->u_pages = pages;
  690                 aobj->u_flags = UAO_FLAG_NOSWAP;
  691                 refs = UVM_OBJ_KERN;
  692                 kobj_alloced = UAO_FLAG_KERNOBJ;
  693         } else if (flags & UAO_FLAG_KERNSWAP) {
  694                 KASSERT(kobj_alloced == UAO_FLAG_KERNOBJ);
  695                 aobj = &kernel_object_store;
  696                 kobj_alloced = UAO_FLAG_KERNSWAP;
  697         } else {
  698                 aobj = pool_get(&uvm_aobj_pool, PR_WAITOK);
  699                 aobj->u_pages = pages;
  700                 aobj->u_flags = 0;
  701                 refs = 1;
  702         }
  703 
  704         /*
  705          * allocate hash/array if necessary
  706          */
  707         if (flags == 0 || (flags & (UAO_FLAG_KERNSWAP | UAO_FLAG_CANFAIL))) {
  708                 int mflags;
  709 
  710                 if (flags)
  711                         mflags = M_NOWAIT;
  712                 else
  713                         mflags = M_WAITOK;
  714 
  715                 /* allocate hash table or array depending on object size */
  716                 if (UAO_USES_SWHASH(aobj)) {
  717                         aobj->u_swhash = hashinit(UAO_SWHASH_BUCKETS(pages),
  718                             M_UVMAOBJ, mflags, &aobj->u_swhashmask);
  719                         if (aobj->u_swhash == NULL) {
  720                                 if (flags & UAO_FLAG_CANFAIL) {
  721                                         pool_put(&uvm_aobj_pool, aobj);
  722                                         return NULL;
  723                                 }
  724                                 panic("uao_create: hashinit swhash failed");
  725                         }
  726                 } else {
  727                         aobj->u_swslots = mallocarray(pages, sizeof(int),
  728                             M_UVMAOBJ, mflags|M_ZERO);
  729                         if (aobj->u_swslots == NULL) {
  730                                 if (flags & UAO_FLAG_CANFAIL) {
  731                                         pool_put(&uvm_aobj_pool, aobj);
  732                                         return NULL;
  733                                 }
  734                                 panic("uao_create: malloc swslots failed");
  735                         }
  736                 }
  737 
  738                 if (flags & UAO_FLAG_KERNSWAP) {
  739                         aobj->u_flags &= ~UAO_FLAG_NOSWAP; /* clear noswap */
  740                         return &aobj->u_obj;
  741                         /* done! */
  742                 }
  743         }
  744 
  745         /*
  746          * Initialise UVM object.
  747          */
  748         uvm_obj_init(&aobj->u_obj, &aobj_pager, refs);
  749         if (flags & UAO_FLAG_KERNOBJ) {
  750                 /* Use a temporary static lock for kernel_object. */
  751                 rw_init(&bootstrap_kernel_object_lock, "kobjlk");
  752                 uvm_obj_setlock(&aobj->u_obj, &bootstrap_kernel_object_lock);
  753         }
  754 
  755         /*
  756          * now that aobj is ready, add it to the global list
  757          */
  758         mtx_enter(&uao_list_lock);
  759         LIST_INSERT_HEAD(&uao_list, aobj, u_list);
  760         mtx_leave(&uao_list_lock);
  761 
  762         return &aobj->u_obj;
  763 }
  764 
  765 
  766 
  767 /*
  768  * uao_init: set up aobj pager subsystem
  769  *
  770  * => called at boot time from uvm_pager_init()
  771  */
  772 void
  773 uao_init(void)
  774 {
  775         /*
  776          * NOTE: Pages for this pool must not come from a pageable
  777          * kernel map!
  778          */
  779         pool_init(&uao_swhash_elt_pool, sizeof(struct uao_swhash_elt), 0,
  780             IPL_NONE, PR_WAITOK, "uaoeltpl", NULL);
  781         pool_init(&uvm_aobj_pool, sizeof(struct uvm_aobj), 0,
  782             IPL_NONE, PR_WAITOK, "aobjpl", NULL);
  783 }
  784 
  785 /*
  786  * uao_reference: hold a reference to an anonymous UVM object.
  787  */
  788 void
  789 uao_reference(struct uvm_object *uobj)
  790 {
  791         /* Kernel object is persistent. */
  792         if (UVM_OBJ_IS_KERN_OBJECT(uobj))
  793                 return;
  794 
  795         atomic_inc_int(&uobj->uo_refs);
  796 }
  797 
  798 
  799 /*
  800  * uao_detach: drop a reference to an anonymous UVM object.
  801  */
  802 void
  803 uao_detach(struct uvm_object *uobj)
  804 {
  805         struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
  806         struct vm_page *pg;
  807 
  808         /*
  809          * Detaching from kernel_object is a NOP.
  810          */
  811         if (UVM_OBJ_IS_KERN_OBJECT(uobj))
  812                 return;
  813 
  814         /*
  815          * Drop the reference.  If it was the last one, destroy the object.
  816          */
  817         if (atomic_dec_int_nv(&uobj->uo_refs) > 0) {
  818                 return;
  819         }
  820 
  821         /*
  822          * Remove the aobj from the global list.
  823          */
  824         mtx_enter(&uao_list_lock);
  825         LIST_REMOVE(aobj, u_list);
  826         mtx_leave(&uao_list_lock);
  827 
  828         /*
  829          * Free all the pages left in the aobj.  For each page, when the
  830          * page is no longer busy (and thus after any disk I/O that it is
  831          * involved in is complete), release any swap resources and free
  832          * the page itself.
  833          */
  834         rw_enter(uobj->vmobjlock, RW_WRITE);
  835         while ((pg = RBT_ROOT(uvm_objtree, &uobj->memt)) != NULL) {
  836                 pmap_page_protect(pg, PROT_NONE);
  837                 if (pg->pg_flags & PG_BUSY) {
  838                         uvm_pagewait(pg, uobj->vmobjlock, "uao_det");
  839                         rw_enter(uobj->vmobjlock, RW_WRITE);
  840                         continue;
  841                 }
  842                 uao_dropswap(&aobj->u_obj, pg->offset >> PAGE_SHIFT);
  843                 uvm_lock_pageq();
  844                 uvm_pagefree(pg);
  845                 uvm_unlock_pageq();
  846         }
  847 
  848         /*
  849          * Finally, free the anonymous UVM object itself.
  850          */
  851         uao_free(aobj);
  852 }
  853 
  854 /*
  855  * uao_flush: flush pages out of a uvm object
  856  *
  857  * => if PGO_CLEANIT is not set, then we will not block.
  858  * => if PGO_ALLPAGE is set, then all pages in the object are valid targets
  859  *      for flushing.
  860  * => NOTE: we are allowed to lock the page queues, so the caller
  861  *      must not be holding the lock on them [e.g. pagedaemon had
  862  *      better not call us with the queues locked]
  863  * => we return TRUE unless we encountered some sort of I/O error
  864  *      XXXJRT currently never happens, as we never directly initiate
  865  *      XXXJRT I/O
  866  */
  867 boolean_t
  868 uao_flush(struct uvm_object *uobj, voff_t start, voff_t stop, int flags)
  869 {
  870         struct uvm_aobj *aobj = (struct uvm_aobj *) uobj;
  871         struct vm_page *pg;
  872         voff_t curoff;
  873 
  874         KASSERT(UVM_OBJ_IS_AOBJ(uobj));
  875         KASSERT(rw_write_held(uobj->vmobjlock));
  876 
  877         if (flags & PGO_ALLPAGES) {
  878                 start = 0;
  879                 stop = (voff_t)aobj->u_pages << PAGE_SHIFT;
  880         } else {
  881                 start = trunc_page(start);
  882                 stop = round_page(stop);
  883                 if (stop > ((voff_t)aobj->u_pages << PAGE_SHIFT)) {
  884                         printf("uao_flush: strange, got an out of range "
  885                             "flush (fixed)\n");
  886                         stop = (voff_t)aobj->u_pages << PAGE_SHIFT;
  887                 }
  888         }
  889 
  890         /*
  891          * Don't need to do any work here if we're not freeing
  892          * or deactivating pages.
  893          */
  894         if ((flags & (PGO_DEACTIVATE|PGO_FREE)) == 0) {
  895                 return TRUE;
  896         }
  897 
  898         curoff = start;
  899         for (;;) {
  900                 if (curoff < stop) {
  901                         pg = uvm_pagelookup(uobj, curoff);
  902                         curoff += PAGE_SIZE;
  903                         if (pg == NULL)
  904                                 continue;
  905                 } else {
  906                         break;
  907                 }
  908 
  909                 /* Make sure page is unbusy, else wait for it. */
  910                 if (pg->pg_flags & PG_BUSY) {
  911                         uvm_pagewait(pg, uobj->vmobjlock, "uaoflsh");
  912                         rw_enter(uobj->vmobjlock, RW_WRITE);
  913                         curoff -= PAGE_SIZE;
  914                         continue;
  915                 }
  916 
  917                 switch (flags & (PGO_CLEANIT|PGO_FREE|PGO_DEACTIVATE)) {
  918                 /*
  919                  * XXX In these first 3 cases, we always just
  920                  * XXX deactivate the page.  We may want to
  921                  * XXX handle the different cases more specifically
  922                  * XXX in the future.
  923                  */
  924                 case PGO_CLEANIT|PGO_FREE:
  925                         /* FALLTHROUGH */
  926                 case PGO_CLEANIT|PGO_DEACTIVATE:
  927                         /* FALLTHROUGH */
  928                 case PGO_DEACTIVATE:
  929  deactivate_it:
  930                         if (pg->wire_count != 0)
  931                                 continue;
  932 
  933                         uvm_lock_pageq();
  934                         pmap_page_protect(pg, PROT_NONE);
  935                         uvm_pagedeactivate(pg);
  936                         uvm_unlock_pageq();
  937 
  938                         continue;
  939                 case PGO_FREE:
  940                         /*
  941                          * If there are multiple references to
  942                          * the object, just deactivate the page.
  943                          */
  944                         if (uobj->uo_refs > 1)
  945                                 goto deactivate_it;
  946 
  947                         /* XXX skip the page if it's wired */
  948                         if (pg->wire_count != 0)
  949                                 continue;
  950 
  951                         /*
  952                          * free the swap slot and the page.
  953                          */
  954                         pmap_page_protect(pg, PROT_NONE);
  955 
  956                         /*
  957                          * freeing swapslot here is not strictly necessary.
  958                          * however, leaving it here doesn't save much
  959                          * because we need to update swap accounting anyway.
  960                          */
  961                         uao_dropswap(uobj, pg->offset >> PAGE_SHIFT);
  962                         uvm_lock_pageq();
  963                         uvm_pagefree(pg);
  964                         uvm_unlock_pageq();
  965 
  966                         continue;
  967                 default:
  968                         panic("uao_flush: weird flags");
  969                 }
  970         }
  971 
  972         return TRUE;
  973 }
  974 
  975 /*
  976  * uao_get: fetch me a page
  977  *
  978  * we have three cases:
  979  * 1: page is resident     -> just return the page.
  980  * 2: page is zero-fill    -> allocate a new page and zero it.
  981  * 3: page is swapped out  -> fetch the page from swap.
  982  *
  983  * cases 1 can be handled with PGO_LOCKED, cases 2 and 3 cannot.
  984  * so, if the "center" page hits case 3 (or any page, with PGO_ALLPAGES),
  985  * then we will need to return VM_PAGER_UNLOCK.
  986  *
  987  * => flags: PGO_ALLPAGES: get all of the pages
  988  *           PGO_LOCKED: fault data structures are locked
  989  * => NOTE: offset is the offset of pps[0], _NOT_ pps[centeridx]
  990  * => NOTE: caller must check for released pages!!
  991  */
  992 static int
  993 uao_get(struct uvm_object *uobj, voff_t offset, struct vm_page **pps,
  994     int *npagesp, int centeridx, vm_prot_t access_type, int advice, int flags)
  995 {
  996         struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
  997         voff_t current_offset;
  998         vm_page_t ptmp;
  999         int lcv, gotpages, maxpages, swslot, rv, pageidx;
 1000         boolean_t done;
 1001 
 1002         KASSERT(UVM_OBJ_IS_AOBJ(uobj));
 1003         KASSERT(rw_write_held(uobj->vmobjlock));
 1004 
 1005         /*
 1006          * get number of pages
 1007          */
 1008         maxpages = *npagesp;
 1009 
 1010         if (flags & PGO_LOCKED) {
 1011                 /*
 1012                  * step 1a: get pages that are already resident.   only do
 1013                  * this if the data structures are locked (i.e. the first
 1014                  * time through).
 1015                  */
 1016 
 1017                 done = TRUE;    /* be optimistic */
 1018                 gotpages = 0;   /* # of pages we got so far */
 1019 
 1020                 for (lcv = 0, current_offset = offset ; lcv < maxpages ;
 1021                     lcv++, current_offset += PAGE_SIZE) {
 1022                         /* do we care about this page?  if not, skip it */
 1023                         if (pps[lcv] == PGO_DONTCARE)
 1024                                 continue;
 1025 
 1026                         ptmp = uvm_pagelookup(uobj, current_offset);
 1027 
 1028                         /*
 1029                          * if page is new, attempt to allocate the page,
 1030                          * zero-fill'd.
 1031                          */
 1032                         if (ptmp == NULL && uao_find_swslot(uobj,
 1033                             current_offset >> PAGE_SHIFT) == 0) {
 1034                                 ptmp = uvm_pagealloc(uobj, current_offset,
 1035                                     NULL, UVM_PGA_ZERO);
 1036                                 if (ptmp) {
 1037                                         /* new page */
 1038                                         atomic_clearbits_int(&ptmp->pg_flags,
 1039                                             PG_BUSY|PG_FAKE);
 1040                                         atomic_setbits_int(&ptmp->pg_flags,
 1041                                             PQ_AOBJ);
 1042                                         UVM_PAGE_OWN(ptmp, NULL);
 1043                                 }
 1044                         }
 1045 
 1046                         /*
 1047                          * to be useful must get a non-busy page
 1048                          */
 1049                         if (ptmp == NULL ||
 1050                             (ptmp->pg_flags & PG_BUSY) != 0) {
 1051                                 if (lcv == centeridx ||
 1052                                     (flags & PGO_ALLPAGES) != 0)
 1053                                         /* need to do a wait or I/O! */
 1054                                         done = FALSE;   
 1055                                 continue;
 1056                         }
 1057 
 1058                         /*
 1059                          * useful page: plug it in our result array
 1060                          */
 1061                         atomic_setbits_int(&ptmp->pg_flags, PG_BUSY);
 1062                         UVM_PAGE_OWN(ptmp, "uao_get1");
 1063                         pps[lcv] = ptmp;
 1064                         gotpages++;
 1065 
 1066                 }
 1067 
 1068                 /*
 1069                  * step 1b: now we've either done everything needed or we
 1070                  * to unlock and do some waiting or I/O.
 1071                  */
 1072                 *npagesp = gotpages;
 1073                 if (done)
 1074                         /* bingo! */
 1075                         return VM_PAGER_OK;     
 1076                 else
 1077                         /* EEK!   Need to unlock and I/O */
 1078                         return VM_PAGER_UNLOCK;
 1079         }
 1080 
 1081         /*
 1082          * step 2: get non-resident or busy pages.
 1083          * data structures are unlocked.
 1084          */
 1085         for (lcv = 0, current_offset = offset ; lcv < maxpages ;
 1086             lcv++, current_offset += PAGE_SIZE) {
 1087                 /*
 1088                  * - skip over pages we've already gotten or don't want
 1089                  * - skip over pages we don't _have_ to get
 1090                  */
 1091                 if (pps[lcv] != NULL ||
 1092                     (lcv != centeridx && (flags & PGO_ALLPAGES) == 0))
 1093                         continue;
 1094 
 1095                 pageidx = current_offset >> PAGE_SHIFT;
 1096 
 1097                 /*
 1098                  * we have yet to locate the current page (pps[lcv]).   we
 1099                  * first look for a page that is already at the current offset.
 1100                  * if we find a page, we check to see if it is busy or
 1101                  * released.  if that is the case, then we sleep on the page
 1102                  * until it is no longer busy or released and repeat the lookup.
 1103                  * if the page we found is neither busy nor released, then we
 1104                  * busy it (so we own it) and plug it into pps[lcv].   this
 1105                  * 'break's the following while loop and indicates we are
 1106                  * ready to move on to the next page in the "lcv" loop above.
 1107                  *
 1108                  * if we exit the while loop with pps[lcv] still set to NULL,
 1109                  * then it means that we allocated a new busy/fake/clean page
 1110                  * ptmp in the object and we need to do I/O to fill in the data.
 1111                  */
 1112 
 1113                 /* top of "pps" while loop */
 1114                 while (pps[lcv] == NULL) {
 1115                         /* look for a resident page */
 1116                         ptmp = uvm_pagelookup(uobj, current_offset);
 1117 
 1118                         /* not resident?   allocate one now (if we can) */
 1119                         if (ptmp == NULL) {
 1120 
 1121                                 ptmp = uvm_pagealloc(uobj, current_offset,
 1122                                     NULL, 0);
 1123 
 1124                                 /* out of RAM? */
 1125                                 if (ptmp == NULL) {
 1126                                         rw_exit(uobj->vmobjlock);
 1127                                         uvm_wait("uao_getpage");
 1128                                         rw_enter(uobj->vmobjlock, RW_WRITE);
 1129                                         /* goto top of pps while loop */
 1130                                         continue;
 1131                                 }
 1132 
 1133                                 /*
 1134                                  * safe with PQ's unlocked: because we just
 1135                                  * alloc'd the page
 1136                                  */
 1137                                 atomic_setbits_int(&ptmp->pg_flags, PQ_AOBJ);
 1138 
 1139                                 /* 
 1140                                  * got new page ready for I/O.  break pps while
 1141                                  * loop.  pps[lcv] is still NULL.
 1142                                  */
 1143                                 break;
 1144                         }
 1145 
 1146                         /* page is there, see if we need to wait on it */
 1147                         if ((ptmp->pg_flags & PG_BUSY) != 0) {
 1148                                 uvm_pagewait(ptmp, uobj->vmobjlock, "uao_get");
 1149                                 rw_enter(uobj->vmobjlock, RW_WRITE);
 1150                                 continue;       /* goto top of pps while loop */
 1151                         }
 1152 
 1153                         /*
 1154                          * if we get here then the page is resident and
 1155                          * unbusy.  we busy it now (so we own it).
 1156                          */
 1157                         /* we own it, caller must un-busy */
 1158                         atomic_setbits_int(&ptmp->pg_flags, PG_BUSY);
 1159                         UVM_PAGE_OWN(ptmp, "uao_get2");
 1160                         pps[lcv] = ptmp;
 1161                 }
 1162 
 1163                 /*
 1164                  * if we own the valid page at the correct offset, pps[lcv] will
 1165                  * point to it.   nothing more to do except go to the next page.
 1166                  */
 1167                 if (pps[lcv])
 1168                         continue;                       /* next lcv */
 1169 
 1170                 /*
 1171                  * we have a "fake/busy/clean" page that we just allocated.  
 1172                  * do the needed "i/o", either reading from swap or zeroing.
 1173                  */
 1174                 swslot = uao_find_swslot(uobj, pageidx);
 1175 
 1176                 /* just zero the page if there's nothing in swap.  */
 1177                 if (swslot == 0) {
 1178                         /* page hasn't existed before, just zero it. */
 1179                         uvm_pagezero(ptmp);
 1180                 } else {
 1181                         /*
 1182                          * page in the swapped-out page.
 1183                          * unlock object for i/o, relock when done.
 1184                          */
 1185 
 1186                         rw_exit(uobj->vmobjlock);
 1187                         rv = uvm_swap_get(ptmp, swslot, PGO_SYNCIO);
 1188                         rw_enter(uobj->vmobjlock, RW_WRITE);
 1189 
 1190                         /*
 1191                          * I/O done.  check for errors.
 1192                          */
 1193                         if (rv != VM_PAGER_OK) {
 1194                                 /*
 1195                                  * remove the swap slot from the aobj
 1196                                  * and mark the aobj as having no real slot.
 1197                                  * don't free the swap slot, thus preventing
 1198                                  * it from being used again.
 1199                                  */
 1200                                 swslot = uao_set_swslot(&aobj->u_obj, pageidx,
 1201                                                         SWSLOT_BAD);
 1202                                 uvm_swap_markbad(swslot, 1);
 1203 
 1204                                 if (ptmp->pg_flags & PG_WANTED)
 1205                                         wakeup(ptmp);
 1206                                 atomic_clearbits_int(&ptmp->pg_flags,
 1207                                     PG_WANTED|PG_BUSY);
 1208                                 UVM_PAGE_OWN(ptmp, NULL);
 1209                                 uvm_lock_pageq();
 1210                                 uvm_pagefree(ptmp);
 1211                                 uvm_unlock_pageq();
 1212                                 rw_exit(uobj->vmobjlock);
 1213 
 1214                                 return rv;
 1215                         }
 1216                 }
 1217 
 1218                 /*
 1219                  * we got the page!   clear the fake flag (indicates valid
 1220                  * data now in page) and plug into our result array.   note
 1221                  * that page is still busy.
 1222                  *
 1223                  * it is the callers job to:
 1224                  * => check if the page is released
 1225                  * => unbusy the page
 1226                  * => activate the page
 1227                  */
 1228                 atomic_clearbits_int(&ptmp->pg_flags, PG_FAKE);
 1229                 pmap_clear_modify(ptmp);                /* ... and clean */
 1230                 pps[lcv] = ptmp;
 1231 
 1232         }       /* lcv loop */
 1233 
 1234         rw_exit(uobj->vmobjlock);
 1235         return VM_PAGER_OK;
 1236 }
 1237 
 1238 /*
 1239  * uao_dropswap:  release any swap resources from this aobj page.
 1240  *
 1241  * => aobj must be locked or have a reference count of 0.
 1242  */
 1243 int
 1244 uao_dropswap(struct uvm_object *uobj, int pageidx)
 1245 {
 1246         int slot;
 1247 
 1248         KASSERT(UVM_OBJ_IS_AOBJ(uobj));
 1249 
 1250         slot = uao_set_swslot(uobj, pageidx, 0);
 1251         if (slot) {
 1252                 uvm_swap_free(slot, 1);
 1253         }
 1254         return slot;
 1255 }
 1256 
 1257 /*
 1258  * page in every page in every aobj that is paged-out to a range of swslots.
 1259  * 
 1260  * => aobj must be locked and is returned locked.
 1261  * => returns TRUE if pagein was aborted due to lack of memory.
 1262  */
 1263 boolean_t
 1264 uao_swap_off(int startslot, int endslot)
 1265 {
 1266         struct uvm_aobj *aobj;
 1267 
 1268         /*
 1269          * Walk the list of all anonymous UVM objects.  Grab the first.
 1270          */
 1271         mtx_enter(&uao_list_lock);
 1272         if ((aobj = LIST_FIRST(&uao_list)) == NULL) {
 1273                 mtx_leave(&uao_list_lock);
 1274                 return FALSE;
 1275         }
 1276         uao_reference(&aobj->u_obj);
 1277 
 1278         do {
 1279                 struct uvm_aobj *nextaobj;
 1280                 boolean_t rv;
 1281 
 1282                 /*
 1283                  * Prefetch the next object and immediately hold a reference
 1284                  * on it, so neither the current nor the next entry could
 1285                  * disappear while we are iterating.
 1286                  */
 1287                 if ((nextaobj = LIST_NEXT(aobj, u_list)) != NULL) {
 1288                         uao_reference(&nextaobj->u_obj);
 1289                 }
 1290                 mtx_leave(&uao_list_lock);
 1291 
 1292                 /*
 1293                  * Page in all pages in the swap slot range.
 1294                  */
 1295                 rw_enter(aobj->u_obj.vmobjlock, RW_WRITE);
 1296                 rv = uao_pagein(aobj, startslot, endslot);
 1297                 rw_exit(aobj->u_obj.vmobjlock);
 1298 
 1299                 /* Drop the reference of the current object. */
 1300                 uao_detach(&aobj->u_obj);
 1301                 if (rv) {
 1302                         if (nextaobj) {
 1303                                 uao_detach(&nextaobj->u_obj);
 1304                         }
 1305                         return rv;
 1306                 }
 1307 
 1308                 aobj = nextaobj;
 1309                 mtx_enter(&uao_list_lock);
 1310         } while (aobj);
 1311 
 1312         /*
 1313          * done with traversal, unlock the list
 1314          */
 1315         mtx_leave(&uao_list_lock);
 1316         return FALSE;
 1317 }
 1318 
 1319 /*
 1320  * page in any pages from aobj in the given range.
 1321  *
 1322  * => returns TRUE if pagein was aborted due to lack of memory.
 1323  */
 1324 static boolean_t
 1325 uao_pagein(struct uvm_aobj *aobj, int startslot, int endslot)
 1326 {
 1327         boolean_t rv;
 1328 
 1329         if (UAO_USES_SWHASH(aobj)) {
 1330                 struct uao_swhash_elt *elt;
 1331                 int bucket;
 1332 
 1333 restart:
 1334                 for (bucket = aobj->u_swhashmask; bucket >= 0; bucket--) {
 1335                         for (elt = LIST_FIRST(&aobj->u_swhash[bucket]);
 1336                              elt != NULL;
 1337                              elt = LIST_NEXT(elt, list)) {
 1338                                 int i;
 1339 
 1340                                 for (i = 0; i < UAO_SWHASH_CLUSTER_SIZE; i++) {
 1341                                         int slot = elt->slots[i];
 1342 
 1343                                         /*
 1344                                          * if the slot isn't in range, skip it.
 1345                                          */
 1346                                         if (slot < startslot ||
 1347                                             slot >= endslot) {
 1348                                                 continue;
 1349                                         }
 1350 
 1351                                         /*
 1352                                          * process the page,
 1353                                          * the start over on this object
 1354                                          * since the swhash elt
 1355                                          * may have been freed.
 1356                                          */
 1357                                         rv = uao_pagein_page(aobj,
 1358                                           UAO_SWHASH_ELT_PAGEIDX_BASE(elt) + i);
 1359                                         if (rv) {
 1360                                                 return rv;
 1361                                         }
 1362                                         goto restart;
 1363                                 }
 1364                         }
 1365                 }
 1366         } else {
 1367                 int i;
 1368 
 1369                 for (i = 0; i < aobj->u_pages; i++) {
 1370                         int slot = aobj->u_swslots[i];
 1371 
 1372                         /*
 1373                          * if the slot isn't in range, skip it
 1374                          */
 1375                         if (slot < startslot || slot >= endslot) {
 1376                                 continue;
 1377                         }
 1378 
 1379                         /*
 1380                          * process the page.
 1381                          */
 1382                         rv = uao_pagein_page(aobj, i);
 1383                         if (rv) {
 1384                                 return rv;
 1385                         }
 1386                 }
 1387         }
 1388 
 1389         return FALSE;
 1390 }
 1391 
 1392 /*
 1393  * uao_pagein_page: page in a single page from an anonymous UVM object.
 1394  *
 1395  * => Returns TRUE if pagein was aborted due to lack of memory.
 1396  */
 1397 static boolean_t
 1398 uao_pagein_page(struct uvm_aobj *aobj, int pageidx)
 1399 {
 1400         struct uvm_object *uobj = &aobj->u_obj;
 1401         struct vm_page *pg;
 1402         int rv, slot, npages;
 1403 
 1404         pg = NULL;
 1405         npages = 1;
 1406 
 1407         KASSERT(rw_write_held(uobj->vmobjlock));
 1408         rv = uao_get(&aobj->u_obj, (voff_t)pageidx << PAGE_SHIFT,
 1409             &pg, &npages, 0, PROT_READ | PROT_WRITE, 0, 0);
 1410 
 1411         /*
 1412          * relock and finish up.
 1413          */
 1414         rw_enter(uobj->vmobjlock, RW_WRITE);
 1415         switch (rv) {
 1416         case VM_PAGER_OK:
 1417                 break;
 1418 
 1419         case VM_PAGER_ERROR:
 1420         case VM_PAGER_REFAULT:
 1421                 /*
 1422                  * nothing more to do on errors.
 1423                  * VM_PAGER_REFAULT can only mean that the anon was freed,
 1424                  * so again there's nothing to do.
 1425                  */
 1426                 return FALSE;
 1427         }
 1428 
 1429         /*
 1430          * ok, we've got the page now.
 1431          * mark it as dirty, clear its swslot and un-busy it.
 1432          */
 1433         slot = uao_set_swslot(&aobj->u_obj, pageidx, 0);
 1434         uvm_swap_free(slot, 1);
 1435         atomic_clearbits_int(&pg->pg_flags, PG_BUSY|PG_CLEAN|PG_FAKE);
 1436         UVM_PAGE_OWN(pg, NULL);
 1437 
 1438         /*
 1439          * deactivate the page (to put it on a page queue).
 1440          */
 1441         pmap_clear_reference(pg);
 1442         uvm_lock_pageq();
 1443         uvm_pagedeactivate(pg);
 1444         uvm_unlock_pageq();
 1445 
 1446         return FALSE;
 1447 }
 1448 
 1449 /*
 1450  * uao_dropswap_range: drop swapslots in the range.
 1451  *
 1452  * => aobj must be locked and is returned locked.
 1453  * => start is inclusive.  end is exclusive.
 1454  */
 1455 void
 1456 uao_dropswap_range(struct uvm_object *uobj, voff_t start, voff_t end)
 1457 {
 1458         struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
 1459         int swpgonlydelta = 0;
 1460 
 1461         KASSERT(UVM_OBJ_IS_AOBJ(uobj));
 1462         KASSERT(rw_write_held(uobj->vmobjlock));
 1463 
 1464         if (end == 0) {
 1465                 end = INT64_MAX;
 1466         }
 1467 
 1468         if (UAO_USES_SWHASH(aobj)) {
 1469                 int i, hashbuckets = aobj->u_swhashmask + 1;
 1470                 voff_t taghi;
 1471                 voff_t taglo;
 1472 
 1473                 taglo = UAO_SWHASH_ELT_TAG(start);
 1474                 taghi = UAO_SWHASH_ELT_TAG(end);
 1475 
 1476                 for (i = 0; i < hashbuckets; i++) {
 1477                         struct uao_swhash_elt *elt, *next;
 1478 
 1479                         for (elt = LIST_FIRST(&aobj->u_swhash[i]);
 1480                              elt != NULL;
 1481                              elt = next) {
 1482                                 int startidx, endidx;
 1483                                 int j;
 1484 
 1485                                 next = LIST_NEXT(elt, list);
 1486 
 1487                                 if (elt->tag < taglo || taghi < elt->tag) {
 1488                                         continue;
 1489                                 }
 1490 
 1491                                 if (elt->tag == taglo) {
 1492                                         startidx =
 1493                                             UAO_SWHASH_ELT_PAGESLOT_IDX(start);
 1494                                 } else {
 1495                                         startidx = 0;
 1496                                 }
 1497 
 1498                                 if (elt->tag == taghi) {
 1499                                         endidx =
 1500                                             UAO_SWHASH_ELT_PAGESLOT_IDX(end);
 1501                                 } else {
 1502                                         endidx = UAO_SWHASH_CLUSTER_SIZE;
 1503                                 }
 1504 
 1505                                 for (j = startidx; j < endidx; j++) {
 1506                                         int slot = elt->slots[j];
 1507 
 1508                                         KASSERT(uvm_pagelookup(&aobj->u_obj,
 1509                                             (voff_t)(UAO_SWHASH_ELT_PAGEIDX_BASE(elt)
 1510                                             + j) << PAGE_SHIFT) == NULL);
 1511 
 1512                                         if (slot > 0) {
 1513                                                 uvm_swap_free(slot, 1);
 1514                                                 swpgonlydelta++;
 1515                                                 KASSERT(elt->count > 0);
 1516                                                 elt->slots[j] = 0;
 1517                                                 elt->count--;
 1518                                         }
 1519                                 }
 1520 
 1521                                 if (elt->count == 0) {
 1522                                         LIST_REMOVE(elt, list);
 1523                                         pool_put(&uao_swhash_elt_pool, elt);
 1524                                 }
 1525                         }
 1526                 }
 1527         } else {
 1528                 int i;
 1529 
 1530                 if (aobj->u_pages < end) {
 1531                         end = aobj->u_pages;
 1532                 }
 1533                 for (i = start; i < end; i++) {
 1534                         int slot = aobj->u_swslots[i];
 1535 
 1536                         if (slot > 0) {
 1537                                 uvm_swap_free(slot, 1);
 1538                                 swpgonlydelta++;
 1539                         }
 1540                 }
 1541         }
 1542 
 1543         /*
 1544          * adjust the counter of pages only in swap for all
 1545          * the swap slots we've freed.
 1546          */
 1547         if (swpgonlydelta > 0) {
 1548                 KASSERT(uvmexp.swpgonly >= swpgonlydelta);
 1549                 atomic_add_int(&uvmexp.swpgonly, -swpgonlydelta);
 1550         }
 1551 }
Cache object: 6f947088bb4366882aa3679b8d298901
[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]
This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.
FreeBSD/Linux Kernel Cross Reference sys/uvm/uvm_aobj.c

FreeBSD/Linux Kernel Cross Reference
sys/uvm/uvm_aobj.c