The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/uvm/uvm_aobj.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*      $NetBSD: uvm_aobj.c,v 1.82.2.1 2007/08/24 16:52:25 liamjfoy Exp $       */
    2 
    3 /*
    4  * Copyright (c) 1998 Chuck Silvers, Charles D. Cranor and
    5  *                    Washington University.
    6  * All rights reserved.
    7  *
    8  * Redistribution and use in source and binary forms, with or without
    9  * modification, are permitted provided that the following conditions
   10  * are met:
   11  * 1. Redistributions of source code must retain the above copyright
   12  *    notice, this list of conditions and the following disclaimer.
   13  * 2. Redistributions in binary form must reproduce the above copyright
   14  *    notice, this list of conditions and the following disclaimer in the
   15  *    documentation and/or other materials provided with the distribution.
   16  * 3. All advertising materials mentioning features or use of this software
   17  *    must display the following acknowledgement:
   18  *      This product includes software developed by Charles D. Cranor and
   19  *      Washington University.
   20  * 4. The name of the author may not be used to endorse or promote products
   21  *    derived from this software without specific prior written permission.
   22  *
   23  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
   24  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
   25  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
   26  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
   27  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
   28  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
   29  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
   30  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
   31  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
   32  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
   33  *
   34  * from: Id: uvm_aobj.c,v 1.1.2.5 1998/02/06 05:14:38 chs Exp
   35  */
   36 /*
   37  * uvm_aobj.c: anonymous memory uvm_object pager
   38  *
   39  * author: Chuck Silvers <chuq@chuq.com>
   40  * started: Jan-1998
   41  *
   42  * - design mostly from Chuck Cranor
   43  */
   44 
   45 #include <sys/cdefs.h>
   46 __KERNEL_RCSID(0, "$NetBSD: uvm_aobj.c,v 1.82.2.1 2007/08/24 16:52:25 liamjfoy Exp $");
   47 
   48 #include "opt_uvmhist.h"
   49 
   50 #include <sys/param.h>
   51 #include <sys/systm.h>
   52 #include <sys/proc.h>
   53 #include <sys/malloc.h>
   54 #include <sys/kernel.h>
   55 #include <sys/pool.h>
   56 #include <sys/kernel.h>
   57 
   58 #include <uvm/uvm.h>
   59 
   60 /*
   61  * an aobj manages anonymous-memory backed uvm_objects.   in addition
   62  * to keeping the list of resident pages, it also keeps a list of
   63  * allocated swap blocks.  depending on the size of the aobj this list
   64  * of allocated swap blocks is either stored in an array (small objects)
   65  * or in a hash table (large objects).
   66  */
   67 
   68 /*
   69  * local structures
   70  */
   71 
   72 /*
   73  * for hash tables, we break the address space of the aobj into blocks
   74  * of UAO_SWHASH_CLUSTER_SIZE pages.   we require the cluster size to
   75  * be a power of two.
   76  */
   77 
   78 #define UAO_SWHASH_CLUSTER_SHIFT 4
   79 #define UAO_SWHASH_CLUSTER_SIZE (1 << UAO_SWHASH_CLUSTER_SHIFT)
   80 
   81 /* get the "tag" for this page index */
   82 #define UAO_SWHASH_ELT_TAG(PAGEIDX) \
   83         ((PAGEIDX) >> UAO_SWHASH_CLUSTER_SHIFT)
   84 
   85 #define UAO_SWHASH_ELT_PAGESLOT_IDX(PAGEIDX) \
   86         ((PAGEIDX) & (UAO_SWHASH_CLUSTER_SIZE - 1))
   87 
   88 /* given an ELT and a page index, find the swap slot */
   89 #define UAO_SWHASH_ELT_PAGESLOT(ELT, PAGEIDX) \
   90         ((ELT)->slots[UAO_SWHASH_ELT_PAGESLOT_IDX(PAGEIDX)])
   91 
   92 /* given an ELT, return its pageidx base */
   93 #define UAO_SWHASH_ELT_PAGEIDX_BASE(ELT) \
   94         ((ELT)->tag << UAO_SWHASH_CLUSTER_SHIFT)
   95 
   96 /*
   97  * the swhash hash function
   98  */
   99 
  100 #define UAO_SWHASH_HASH(AOBJ, PAGEIDX) \
  101         (&(AOBJ)->u_swhash[(((PAGEIDX) >> UAO_SWHASH_CLUSTER_SHIFT) \
  102                             & (AOBJ)->u_swhashmask)])
  103 
  104 /*
  105  * the swhash threshhold determines if we will use an array or a
  106  * hash table to store the list of allocated swap blocks.
  107  */
  108 
  109 #define UAO_SWHASH_THRESHOLD (UAO_SWHASH_CLUSTER_SIZE * 4)
  110 #define UAO_USES_SWHASH(AOBJ) \
  111         ((AOBJ)->u_pages > UAO_SWHASH_THRESHOLD)        /* use hash? */
  112 
  113 /*
  114  * the number of buckets in a swhash, with an upper bound
  115  */
  116 
  117 #define UAO_SWHASH_MAXBUCKETS 256
  118 #define UAO_SWHASH_BUCKETS(AOBJ) \
  119         (MIN((AOBJ)->u_pages >> UAO_SWHASH_CLUSTER_SHIFT, \
  120              UAO_SWHASH_MAXBUCKETS))
  121 
  122 
  123 /*
  124  * uao_swhash_elt: when a hash table is being used, this structure defines
  125  * the format of an entry in the bucket list.
  126  */
  127 
  128 struct uao_swhash_elt {
  129         LIST_ENTRY(uao_swhash_elt) list;        /* the hash list */
  130         voff_t tag;                             /* our 'tag' */
  131         int count;                              /* our number of active slots */
  132         int slots[UAO_SWHASH_CLUSTER_SIZE];     /* the slots */
  133 };
  134 
  135 /*
  136  * uao_swhash: the swap hash table structure
  137  */
  138 
  139 LIST_HEAD(uao_swhash, uao_swhash_elt);
  140 
  141 /*
  142  * uao_swhash_elt_pool: pool of uao_swhash_elt structures
  143  * NOTE: Pages for this pool must not come from a pageable kernel map!
  144  */
  145 POOL_INIT(uao_swhash_elt_pool, sizeof(struct uao_swhash_elt), 0, 0, 0,
  146     "uaoeltpl", NULL);
  147 
  148 /*
  149  * uvm_aobj: the actual anon-backed uvm_object
  150  *
  151  * => the uvm_object is at the top of the structure, this allows
  152  *   (struct uvm_aobj *) == (struct uvm_object *)
  153  * => only one of u_swslots and u_swhash is used in any given aobj
  154  */
  155 
  156 struct uvm_aobj {
  157         struct uvm_object u_obj; /* has: lock, pgops, memq, #pages, #refs */
  158         pgoff_t u_pages;         /* number of pages in entire object */
  159         int u_flags;             /* the flags (see uvm_aobj.h) */
  160         int *u_swslots;          /* array of offset->swapslot mappings */
  161                                  /*
  162                                   * hashtable of offset->swapslot mappings
  163                                   * (u_swhash is an array of bucket heads)
  164                                   */
  165         struct uao_swhash *u_swhash;
  166         u_long u_swhashmask;            /* mask for hashtable */
  167         LIST_ENTRY(uvm_aobj) u_list;    /* global list of aobjs */
  168 };
  169 
  170 /*
  171  * uvm_aobj_pool: pool of uvm_aobj structures
  172  */
  173 POOL_INIT(uvm_aobj_pool, sizeof(struct uvm_aobj), 0, 0, 0, "aobjpl",
  174     &pool_allocator_nointr);
  175 
  176 MALLOC_DEFINE(M_UVMAOBJ, "UVM aobj", "UVM aobj and related structures");
  177 
  178 /*
  179  * local functions
  180  */
  181 
  182 static void     uao_free(struct uvm_aobj *);
  183 static int      uao_get(struct uvm_object *, voff_t, struct vm_page **,
  184                     int *, int, vm_prot_t, int, int);
  185 static boolean_t uao_put(struct uvm_object *, voff_t, voff_t, int);
  186 
  187 #if defined(VMSWAP)
  188 static struct uao_swhash_elt *uao_find_swhash_elt
  189     (struct uvm_aobj *, int, boolean_t);
  190 
  191 static boolean_t uao_pagein(struct uvm_aobj *, int, int);
  192 static boolean_t uao_pagein_page(struct uvm_aobj *, int);
  193 static void uao_dropswap_range1(struct uvm_aobj *, voff_t, voff_t);
  194 #endif /* defined(VMSWAP) */
  195 
  196 /*
  197  * aobj_pager
  198  *
  199  * note that some functions (e.g. put) are handled elsewhere
  200  */
  201 
  202 struct uvm_pagerops aobj_pager = {
  203         NULL,                   /* init */
  204         uao_reference,          /* reference */
  205         uao_detach,             /* detach */
  206         NULL,                   /* fault */
  207         uao_get,                /* get */
  208         uao_put,                /* flush */
  209 };
  210 
  211 /*
  212  * uao_list: global list of active aobjs, locked by uao_list_lock
  213  */
  214 
  215 static LIST_HEAD(aobjlist, uvm_aobj) uao_list;
  216 static struct simplelock uao_list_lock;
  217 
  218 /*
  219  * functions
  220  */
  221 
  222 /*
  223  * hash table/array related functions
  224  */
  225 
  226 #if defined(VMSWAP)
  227 
  228 /*
  229  * uao_find_swhash_elt: find (or create) a hash table entry for a page
  230  * offset.
  231  *
  232  * => the object should be locked by the caller
  233  */
  234 
  235 static struct uao_swhash_elt *
  236 uao_find_swhash_elt(struct uvm_aobj *aobj, int pageidx, boolean_t create)
  237 {
  238         struct uao_swhash *swhash;
  239         struct uao_swhash_elt *elt;
  240         voff_t page_tag;
  241 
  242         swhash = UAO_SWHASH_HASH(aobj, pageidx);
  243         page_tag = UAO_SWHASH_ELT_TAG(pageidx);
  244 
  245         /*
  246          * now search the bucket for the requested tag
  247          */
  248 
  249         LIST_FOREACH(elt, swhash, list) {
  250                 if (elt->tag == page_tag) {
  251                         return elt;
  252                 }
  253         }
  254         if (!create) {
  255                 return NULL;
  256         }
  257 
  258         /*
  259          * allocate a new entry for the bucket and init/insert it in
  260          */
  261 
  262         elt = pool_get(&uao_swhash_elt_pool, PR_NOWAIT);
  263         if (elt == NULL) {
  264                 return NULL;
  265         }
  266         LIST_INSERT_HEAD(swhash, elt, list);
  267         elt->tag = page_tag;
  268         elt->count = 0;
  269         memset(elt->slots, 0, sizeof(elt->slots));
  270         return elt;
  271 }
  272 
  273 /*
  274  * uao_find_swslot: find the swap slot number for an aobj/pageidx
  275  *
  276  * => object must be locked by caller
  277  */
  278 
  279 int
  280 uao_find_swslot(struct uvm_object *uobj, int pageidx)
  281 {
  282         struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
  283         struct uao_swhash_elt *elt;
  284 
  285         /*
  286          * if noswap flag is set, then we never return a slot
  287          */
  288 
  289         if (aobj->u_flags & UAO_FLAG_NOSWAP)
  290                 return(0);
  291 
  292         /*
  293          * if hashing, look in hash table.
  294          */
  295 
  296         if (UAO_USES_SWHASH(aobj)) {
  297                 elt = uao_find_swhash_elt(aobj, pageidx, FALSE);
  298                 if (elt)
  299                         return(UAO_SWHASH_ELT_PAGESLOT(elt, pageidx));
  300                 else
  301                         return(0);
  302         }
  303 
  304         /*
  305          * otherwise, look in the array
  306          */
  307 
  308         return(aobj->u_swslots[pageidx]);
  309 }
  310 
  311 /*
  312  * uao_set_swslot: set the swap slot for a page in an aobj.
  313  *
  314  * => setting a slot to zero frees the slot
  315  * => object must be locked by caller
  316  * => we return the old slot number, or -1 if we failed to allocate
  317  *    memory to record the new slot number
  318  */
  319 
  320 int
  321 uao_set_swslot(struct uvm_object *uobj, int pageidx, int slot)
  322 {
  323         struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
  324         struct uao_swhash_elt *elt;
  325         int oldslot;
  326         UVMHIST_FUNC("uao_set_swslot"); UVMHIST_CALLED(pdhist);
  327         UVMHIST_LOG(pdhist, "aobj %p pageidx %d slot %d",
  328             aobj, pageidx, slot, 0);
  329 
  330         /*
  331          * if noswap flag is set, then we can't set a non-zero slot.
  332          */
  333 
  334         if (aobj->u_flags & UAO_FLAG_NOSWAP) {
  335                 if (slot == 0)
  336                         return(0);
  337 
  338                 printf("uao_set_swslot: uobj = %p\n", uobj);
  339                 panic("uao_set_swslot: NOSWAP object");
  340         }
  341 
  342         /*
  343          * are we using a hash table?  if so, add it in the hash.
  344          */
  345 
  346         if (UAO_USES_SWHASH(aobj)) {
  347 
  348                 /*
  349                  * Avoid allocating an entry just to free it again if
  350                  * the page had not swap slot in the first place, and
  351                  * we are freeing.
  352                  */
  353 
  354                 elt = uao_find_swhash_elt(aobj, pageidx, slot != 0);
  355                 if (elt == NULL) {
  356                         return slot ? -1 : 0;
  357                 }
  358 
  359                 oldslot = UAO_SWHASH_ELT_PAGESLOT(elt, pageidx);
  360                 UAO_SWHASH_ELT_PAGESLOT(elt, pageidx) = slot;
  361 
  362                 /*
  363                  * now adjust the elt's reference counter and free it if we've
  364                  * dropped it to zero.
  365                  */
  366 
  367                 if (slot) {
  368                         if (oldslot == 0)
  369                                 elt->count++;
  370                 } else {
  371                         if (oldslot)
  372                                 elt->count--;
  373 
  374                         if (elt->count == 0) {
  375                                 LIST_REMOVE(elt, list);
  376                                 pool_put(&uao_swhash_elt_pool, elt);
  377                         }
  378                 }
  379         } else {
  380                 /* we are using an array */
  381                 oldslot = aobj->u_swslots[pageidx];
  382                 aobj->u_swslots[pageidx] = slot;
  383         }
  384         return (oldslot);
  385 }
  386 
  387 #endif /* defined(VMSWAP) */
  388 
  389 /*
  390  * end of hash/array functions
  391  */
  392 
  393 /*
  394  * uao_free: free all resources held by an aobj, and then free the aobj
  395  *
  396  * => the aobj should be dead
  397  */
  398 
  399 static void
  400 uao_free(struct uvm_aobj *aobj)
  401 {
  402         int swpgonlydelta = 0;
  403 
  404 #if defined(VMSWAP)
  405         uao_dropswap_range1(aobj, 0, 0);
  406 #endif /* defined(VMSWAP) */
  407 
  408         simple_unlock(&aobj->u_obj.vmobjlock);
  409 
  410 #if defined(VMSWAP)
  411         if (UAO_USES_SWHASH(aobj)) {
  412 
  413                 /*
  414                  * free the hash table itself.
  415                  */
  416 
  417                 free(aobj->u_swhash, M_UVMAOBJ);
  418         } else {
  419 
  420                 /*
  421                  * free the array itsself.
  422                  */
  423 
  424                 free(aobj->u_swslots, M_UVMAOBJ);
  425         }
  426 #endif /* defined(VMSWAP) */
  427 
  428         /*
  429          * finally free the aobj itself
  430          */
  431 
  432         pool_put(&uvm_aobj_pool, aobj);
  433 
  434         /*
  435          * adjust the counter of pages only in swap for all
  436          * the swap slots we've freed.
  437          */
  438 
  439         if (swpgonlydelta > 0) {
  440                 simple_lock(&uvm.swap_data_lock);
  441                 KASSERT(uvmexp.swpgonly >= swpgonlydelta);
  442                 uvmexp.swpgonly -= swpgonlydelta;
  443                 simple_unlock(&uvm.swap_data_lock);
  444         }
  445 }
  446 
  447 /*
  448  * pager functions
  449  */
  450 
  451 /*
  452  * uao_create: create an aobj of the given size and return its uvm_object.
  453  *
  454  * => for normal use, flags are always zero
  455  * => for the kernel object, the flags are:
  456  *      UAO_FLAG_KERNOBJ - allocate the kernel object (can only happen once)
  457  *      UAO_FLAG_KERNSWAP - enable swapping of kernel object ("           ")
  458  */
  459 
  460 struct uvm_object *
  461 uao_create(vsize_t size, int flags)
  462 {
  463         static struct uvm_aobj kernel_object_store;
  464         static int kobj_alloced = 0;
  465         pgoff_t pages = round_page(size) >> PAGE_SHIFT;
  466         struct uvm_aobj *aobj;
  467         int refs;
  468 
  469         /*
  470          * malloc a new aobj unless we are asked for the kernel object
  471          */
  472 
  473         if (flags & UAO_FLAG_KERNOBJ) {
  474                 KASSERT(!kobj_alloced);
  475                 aobj = &kernel_object_store;
  476                 aobj->u_pages = pages;
  477                 aobj->u_flags = UAO_FLAG_NOSWAP;
  478                 refs = UVM_OBJ_KERN;
  479                 kobj_alloced = UAO_FLAG_KERNOBJ;
  480         } else if (flags & UAO_FLAG_KERNSWAP) {
  481                 KASSERT(kobj_alloced == UAO_FLAG_KERNOBJ);
  482                 aobj = &kernel_object_store;
  483                 kobj_alloced = UAO_FLAG_KERNSWAP;
  484                 refs = 0xdeadbeaf; /* XXX: gcc */
  485         } else {
  486                 aobj = pool_get(&uvm_aobj_pool, PR_WAITOK);
  487                 aobj->u_pages = pages;
  488                 aobj->u_flags = 0;
  489                 refs = 1;
  490         }
  491 
  492         /*
  493          * allocate hash/array if necessary
  494          *
  495          * note: in the KERNSWAP case no need to worry about locking since
  496          * we are still booting we should be the only thread around.
  497          */
  498 
  499         if (flags == 0 || (flags & UAO_FLAG_KERNSWAP) != 0) {
  500 #if defined(VMSWAP)
  501                 int mflags = (flags & UAO_FLAG_KERNSWAP) != 0 ?
  502                     M_NOWAIT : M_WAITOK;
  503 
  504                 /* allocate hash table or array depending on object size */
  505                 if (UAO_USES_SWHASH(aobj)) {
  506                         aobj->u_swhash = hashinit(UAO_SWHASH_BUCKETS(aobj),
  507                             HASH_LIST, M_UVMAOBJ, mflags, &aobj->u_swhashmask);
  508                         if (aobj->u_swhash == NULL)
  509                                 panic("uao_create: hashinit swhash failed");
  510                 } else {
  511                         aobj->u_swslots = malloc(pages * sizeof(int),
  512                             M_UVMAOBJ, mflags);
  513                         if (aobj->u_swslots == NULL)
  514                                 panic("uao_create: malloc swslots failed");
  515                         memset(aobj->u_swslots, 0, pages * sizeof(int));
  516                 }
  517 #endif /* defined(VMSWAP) */
  518 
  519                 if (flags) {
  520                         aobj->u_flags &= ~UAO_FLAG_NOSWAP; /* clear noswap */
  521                         return(&aobj->u_obj);
  522                 }
  523         }
  524 
  525         /*
  526          * init aobj fields
  527          */
  528 
  529         UVM_OBJ_INIT(&aobj->u_obj, &aobj_pager, refs);
  530 
  531         /*
  532          * now that aobj is ready, add it to the global list
  533          */
  534 
  535         simple_lock(&uao_list_lock);
  536         LIST_INSERT_HEAD(&uao_list, aobj, u_list);
  537         simple_unlock(&uao_list_lock);
  538         return(&aobj->u_obj);
  539 }
  540 
  541 
  542 
  543 /*
  544  * uao_init: set up aobj pager subsystem
  545  *
  546  * => called at boot time from uvm_pager_init()
  547  */
  548 
  549 void
  550 uao_init(void)
  551 {
  552         static int uao_initialized;
  553 
  554         if (uao_initialized)
  555                 return;
  556         uao_initialized = TRUE;
  557         LIST_INIT(&uao_list);
  558         simple_lock_init(&uao_list_lock);
  559 }
  560 
  561 /*
  562  * uao_reference: add a ref to an aobj
  563  *
  564  * => aobj must be unlocked
  565  * => just lock it and call the locked version
  566  */
  567 
  568 void
  569 uao_reference(struct uvm_object *uobj)
  570 {
  571         simple_lock(&uobj->vmobjlock);
  572         uao_reference_locked(uobj);
  573         simple_unlock(&uobj->vmobjlock);
  574 }
  575 
  576 /*
  577  * uao_reference_locked: add a ref to an aobj that is already locked
  578  *
  579  * => aobj must be locked
  580  * this needs to be separate from the normal routine
  581  * since sometimes we need to add a reference to an aobj when
  582  * it's already locked.
  583  */
  584 
  585 void
  586 uao_reference_locked(struct uvm_object *uobj)
  587 {
  588         UVMHIST_FUNC("uao_reference"); UVMHIST_CALLED(maphist);
  589 
  590         /*
  591          * kernel_object already has plenty of references, leave it alone.
  592          */
  593 
  594         if (UVM_OBJ_IS_KERN_OBJECT(uobj))
  595                 return;
  596 
  597         uobj->uo_refs++;
  598         UVMHIST_LOG(maphist, "<- done (uobj=0x%x, ref = %d)",
  599                     uobj, uobj->uo_refs,0,0);
  600 }
  601 
  602 /*
  603  * uao_detach: drop a reference to an aobj
  604  *
  605  * => aobj must be unlocked
  606  * => just lock it and call the locked version
  607  */
  608 
  609 void
  610 uao_detach(struct uvm_object *uobj)
  611 {
  612         simple_lock(&uobj->vmobjlock);
  613         uao_detach_locked(uobj);
  614 }
  615 
  616 /*
  617  * uao_detach_locked: drop a reference to an aobj
  618  *
  619  * => aobj must be locked, and is unlocked (or freed) upon return.
  620  * this needs to be separate from the normal routine
  621  * since sometimes we need to detach from an aobj when
  622  * it's already locked.
  623  */
  624 
  625 void
  626 uao_detach_locked(struct uvm_object *uobj)
  627 {
  628         struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
  629         struct vm_page *pg;
  630         UVMHIST_FUNC("uao_detach"); UVMHIST_CALLED(maphist);
  631 
  632         /*
  633          * detaching from kernel_object is a noop.
  634          */
  635 
  636         if (UVM_OBJ_IS_KERN_OBJECT(uobj)) {
  637                 simple_unlock(&uobj->vmobjlock);
  638                 return;
  639         }
  640 
  641         UVMHIST_LOG(maphist,"  (uobj=0x%x)  ref=%d", uobj,uobj->uo_refs,0,0);
  642         uobj->uo_refs--;
  643         if (uobj->uo_refs) {
  644                 simple_unlock(&uobj->vmobjlock);
  645                 UVMHIST_LOG(maphist, "<- done (rc>0)", 0,0,0,0);
  646                 return;
  647         }
  648 
  649         /*
  650          * remove the aobj from the global list.
  651          */
  652 
  653         simple_lock(&uao_list_lock);
  654         LIST_REMOVE(aobj, u_list);
  655         simple_unlock(&uao_list_lock);
  656 
  657         /*
  658          * free all the pages left in the aobj.  for each page,
  659          * when the page is no longer busy (and thus after any disk i/o that
  660          * it's involved in is complete), release any swap resources and
  661          * free the page itself.
  662          */
  663 
  664         uvm_lock_pageq();
  665         while ((pg = TAILQ_FIRST(&uobj->memq)) != NULL) {
  666                 pmap_page_protect(pg, VM_PROT_NONE);
  667                 if (pg->flags & PG_BUSY) {
  668                         pg->flags |= PG_WANTED;
  669                         uvm_unlock_pageq();
  670                         UVM_UNLOCK_AND_WAIT(pg, &uobj->vmobjlock, FALSE,
  671                             "uao_det", 0);
  672                         simple_lock(&uobj->vmobjlock);
  673                         uvm_lock_pageq();
  674                         continue;
  675                 }
  676                 uao_dropswap(&aobj->u_obj, pg->offset >> PAGE_SHIFT);
  677                 uvm_pagefree(pg);
  678         }
  679         uvm_unlock_pageq();
  680 
  681         /*
  682          * finally, free the aobj itself.
  683          */
  684 
  685         uao_free(aobj);
  686 }
  687 
  688 /*
  689  * uao_put: flush pages out of a uvm object
  690  *
  691  * => object should be locked by caller.  we may _unlock_ the object
  692  *      if (and only if) we need to clean a page (PGO_CLEANIT).
  693  *      XXXJRT Currently, however, we don't.  In the case of cleaning
  694  *      XXXJRT a page, we simply just deactivate it.  Should probably
  695  *      XXXJRT handle this better, in the future (although "flushing"
  696  *      XXXJRT anonymous memory isn't terribly important).
  697  * => if PGO_CLEANIT is not set, then we will neither unlock the object
  698  *      or block.
  699  * => if PGO_ALLPAGE is set, then all pages in the object are valid targets
  700  *      for flushing.
  701  * => NOTE: we rely on the fact that the object's memq is a TAILQ and
  702  *      that new pages are inserted on the tail end of the list.  thus,
  703  *      we can make a complete pass through the object in one go by starting
  704  *      at the head and working towards the tail (new pages are put in
  705  *      front of us).
  706  * => NOTE: we are allowed to lock the page queues, so the caller
  707  *      must not be holding the lock on them [e.g. pagedaemon had
  708  *      better not call us with the queues locked]
  709  * => we return TRUE unless we encountered some sort of I/O error
  710  *      XXXJRT currently never happens, as we never directly initiate
  711  *      XXXJRT I/O
  712  *
  713  * note on page traversal:
  714  *      we can traverse the pages in an object either by going down the
  715  *      linked list in "uobj->memq", or we can go over the address range
  716  *      by page doing hash table lookups for each address.  depending
  717  *      on how many pages are in the object it may be cheaper to do one
  718  *      or the other.  we set "by_list" to true if we are using memq.
  719  *      if the cost of a hash lookup was equal to the cost of the list
  720  *      traversal we could compare the number of pages in the start->stop
  721  *      range to the total number of pages in the object.  however, it
  722  *      seems that a hash table lookup is more expensive than the linked
  723  *      list traversal, so we multiply the number of pages in the
  724  *      start->stop range by a penalty which we define below.
  725  */
  726 
  727 static int
  728 uao_put(struct uvm_object *uobj, voff_t start, voff_t stop, int flags)
  729 {
  730         struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
  731         struct vm_page *pg, *nextpg, curmp, endmp;
  732         boolean_t by_list;
  733         voff_t curoff;
  734         UVMHIST_FUNC("uao_put"); UVMHIST_CALLED(maphist);
  735 
  736         curoff = 0;
  737         if (flags & PGO_ALLPAGES) {
  738                 start = 0;
  739                 stop = aobj->u_pages << PAGE_SHIFT;
  740                 by_list = TRUE;         /* always go by the list */
  741         } else {
  742                 start = trunc_page(start);
  743                 if (stop == 0) {
  744                         stop = aobj->u_pages << PAGE_SHIFT;
  745                 } else {
  746                         stop = round_page(stop);
  747                 }
  748                 if (stop > (aobj->u_pages << PAGE_SHIFT)) {
  749                         printf("uao_flush: strange, got an out of range "
  750                             "flush (fixed)\n");
  751                         stop = aobj->u_pages << PAGE_SHIFT;
  752                 }
  753                 by_list = (uobj->uo_npages <=
  754                     ((stop - start) >> PAGE_SHIFT) * UVM_PAGE_HASH_PENALTY);
  755         }
  756         UVMHIST_LOG(maphist,
  757             " flush start=0x%lx, stop=0x%x, by_list=%d, flags=0x%x",
  758             start, stop, by_list, flags);
  759 
  760         /*
  761          * Don't need to do any work here if we're not freeing
  762          * or deactivating pages.
  763          */
  764 
  765         if ((flags & (PGO_DEACTIVATE|PGO_FREE)) == 0) {
  766                 simple_unlock(&uobj->vmobjlock);
  767                 return 0;
  768         }
  769 
  770         /*
  771          * Initialize the marker pages.  See the comment in
  772          * genfs_putpages() also.
  773          */
  774 
  775         curmp.uobject = uobj;
  776         curmp.offset = (voff_t)-1;
  777         curmp.flags = PG_BUSY;
  778         endmp.uobject = uobj;
  779         endmp.offset = (voff_t)-1;
  780         endmp.flags = PG_BUSY;
  781 
  782         /*
  783          * now do it.  note: we must update nextpg in the body of loop or we
  784          * will get stuck.  we need to use nextpg if we'll traverse the list
  785          * because we may free "pg" before doing the next loop.
  786          */
  787 
  788         if (by_list) {
  789                 TAILQ_INSERT_TAIL(&uobj->memq, &endmp, listq);
  790                 nextpg = TAILQ_FIRST(&uobj->memq);
  791                 PHOLD(curlwp);
  792         } else {
  793                 curoff = start;
  794                 nextpg = NULL;  /* Quell compiler warning */
  795         }
  796 
  797         uvm_lock_pageq();
  798 
  799         /* locked: both page queues and uobj */
  800         for (;;) {
  801                 if (by_list) {
  802                         pg = nextpg;
  803                         if (pg == &endmp)
  804                                 break;
  805                         nextpg = TAILQ_NEXT(pg, listq);
  806                         if (pg->offset < start || pg->offset >= stop)
  807                                 continue;
  808                 } else {
  809                         if (curoff < stop) {
  810                                 pg = uvm_pagelookup(uobj, curoff);
  811                                 curoff += PAGE_SIZE;
  812                         } else
  813                                 break;
  814                         if (pg == NULL)
  815                                 continue;
  816                 }
  817                 switch (flags & (PGO_CLEANIT|PGO_FREE|PGO_DEACTIVATE)) {
  818 
  819                 /*
  820                  * XXX In these first 3 cases, we always just
  821                  * XXX deactivate the page.  We may want to
  822                  * XXX handle the different cases more specifically
  823                  * XXX in the future.
  824                  */
  825 
  826                 case PGO_CLEANIT|PGO_FREE:
  827                 case PGO_CLEANIT|PGO_DEACTIVATE:
  828                 case PGO_DEACTIVATE:
  829  deactivate_it:
  830                         /* skip the page if it's loaned or wired */
  831                         if (pg->loan_count != 0 || pg->wire_count != 0)
  832                                 continue;
  833 
  834                         /* ...and deactivate the page. */
  835                         pmap_clear_reference(pg);
  836                         uvm_pagedeactivate(pg);
  837                         continue;
  838 
  839                 case PGO_FREE:
  840 
  841                         /*
  842                          * If there are multiple references to
  843                          * the object, just deactivate the page.
  844                          */
  845 
  846                         if (uobj->uo_refs > 1)
  847                                 goto deactivate_it;
  848 
  849                         /*
  850                          * wait and try again if the page is busy.
  851                          * otherwise free the swap slot and the page.
  852                          */
  853 
  854                         pmap_page_protect(pg, VM_PROT_NONE);
  855                         if (pg->flags & PG_BUSY) {
  856                                 if (by_list) {
  857                                         TAILQ_INSERT_BEFORE(pg, &curmp, listq);
  858                                 }
  859                                 pg->flags |= PG_WANTED;
  860                                 uvm_unlock_pageq();
  861                                 UVM_UNLOCK_AND_WAIT(pg, &uobj->vmobjlock, 0,
  862                                     "uao_put", 0);
  863                                 simple_lock(&uobj->vmobjlock);
  864                                 uvm_lock_pageq();
  865                                 if (by_list) {
  866                                         nextpg = TAILQ_NEXT(&curmp, listq);
  867                                         TAILQ_REMOVE(&uobj->memq, &curmp,
  868                                             listq);
  869                                 } else
  870                                         curoff -= PAGE_SIZE;
  871                                 continue;
  872                         }
  873 
  874                         /*
  875                          * freeing swapslot here is not strictly necessary.
  876                          * however, leaving it here doesn't save much
  877                          * because we need to update swap accounting anyway.
  878                          */
  879 
  880                         uao_dropswap(uobj, pg->offset >> PAGE_SHIFT);
  881                         uvm_pagefree(pg);
  882                         continue;
  883                 }
  884         }
  885         uvm_unlock_pageq();
  886         if (by_list) {
  887                 TAILQ_REMOVE(&uobj->memq, &endmp, listq);
  888                 PRELE(curlwp);
  889         }
  890         simple_unlock(&uobj->vmobjlock);
  891         return 0;
  892 }
  893 
  894 /*
  895  * uao_get: fetch me a page
  896  *
  897  * we have three cases:
  898  * 1: page is resident     -> just return the page.
  899  * 2: page is zero-fill    -> allocate a new page and zero it.
  900  * 3: page is swapped out  -> fetch the page from swap.
  901  *
  902  * cases 1 and 2 can be handled with PGO_LOCKED, case 3 cannot.
  903  * so, if the "center" page hits case 3 (or any page, with PGO_ALLPAGES),
  904  * then we will need to return EBUSY.
  905  *
  906  * => prefer map unlocked (not required)
  907  * => object must be locked!  we will _unlock_ it before starting any I/O.
  908  * => flags: PGO_ALLPAGES: get all of the pages
  909  *           PGO_LOCKED: fault data structures are locked
  910  * => NOTE: offset is the offset of pps[0], _NOT_ pps[centeridx]
  911  * => NOTE: caller must check for released pages!!
  912  */
  913 
  914 static int
  915 uao_get(struct uvm_object *uobj, voff_t offset, struct vm_page **pps,
  916     int *npagesp, int centeridx, vm_prot_t access_type, int advice, int flags)
  917 {
  918 #if defined(VMSWAP)
  919         struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
  920 #endif /* defined(VMSWAP) */
  921         voff_t current_offset;
  922         struct vm_page *ptmp = NULL;    /* Quell compiler warning */
  923         int lcv, gotpages, maxpages, swslot, pageidx;
  924         boolean_t done;
  925         UVMHIST_FUNC("uao_get"); UVMHIST_CALLED(pdhist);
  926 
  927         UVMHIST_LOG(pdhist, "aobj=%p offset=%d, flags=%d",
  928                     (struct uvm_aobj *)uobj, offset, flags,0);
  929 
  930         /*
  931          * get number of pages
  932          */
  933 
  934         maxpages = *npagesp;
  935 
  936         /*
  937          * step 1: handled the case where fault data structures are locked.
  938          */
  939 
  940         if (flags & PGO_LOCKED) {
  941 
  942                 /*
  943                  * step 1a: get pages that are already resident.   only do
  944                  * this if the data structures are locked (i.e. the first
  945                  * time through).
  946                  */
  947 
  948                 done = TRUE;    /* be optimistic */
  949                 gotpages = 0;   /* # of pages we got so far */
  950                 for (lcv = 0, current_offset = offset ; lcv < maxpages ;
  951                     lcv++, current_offset += PAGE_SIZE) {
  952                         /* do we care about this page?  if not, skip it */
  953                         if (pps[lcv] == PGO_DONTCARE)
  954                                 continue;
  955                         ptmp = uvm_pagelookup(uobj, current_offset);
  956 
  957                         /*
  958                          * if page is new, attempt to allocate the page,
  959                          * zero-fill'd.
  960                          */
  961 
  962                         if (ptmp == NULL && uao_find_swslot(&aobj->u_obj,
  963                             current_offset >> PAGE_SHIFT) == 0) {
  964                                 ptmp = uvm_pagealloc(uobj, current_offset,
  965                                     NULL, UVM_PGA_ZERO);
  966                                 if (ptmp) {
  967                                         /* new page */
  968                                         ptmp->flags &= ~(PG_FAKE);
  969                                         ptmp->pqflags |= PQ_AOBJ;
  970                                         goto gotpage;
  971                                 }
  972                         }
  973 
  974                         /*
  975                          * to be useful must get a non-busy page
  976                          */
  977 
  978                         if (ptmp == NULL || (ptmp->flags & PG_BUSY) != 0) {
  979                                 if (lcv == centeridx ||
  980                                     (flags & PGO_ALLPAGES) != 0)
  981                                         /* need to do a wait or I/O! */
  982                                         done = FALSE;
  983                                         continue;
  984                         }
  985 
  986                         /*
  987                          * useful page: busy/lock it and plug it in our
  988                          * result array
  989                          */
  990 
  991                         /* caller must un-busy this page */
  992                         ptmp->flags |= PG_BUSY;
  993                         UVM_PAGE_OWN(ptmp, "uao_get1");
  994 gotpage:
  995                         pps[lcv] = ptmp;
  996                         gotpages++;
  997                 }
  998 
  999                 /*
 1000                  * step 1b: now we've either done everything needed or we
 1001                  * to unlock and do some waiting or I/O.
 1002                  */
 1003 
 1004                 UVMHIST_LOG(pdhist, "<- done (done=%d)", done, 0,0,0);
 1005                 *npagesp = gotpages;
 1006                 if (done)
 1007                         return 0;
 1008                 else
 1009                         return EBUSY;
 1010         }
 1011 
 1012         /*
 1013          * step 2: get non-resident or busy pages.
 1014          * object is locked.   data structures are unlocked.
 1015          */
 1016 
 1017         if ((flags & PGO_SYNCIO) == 0) {
 1018                 goto done;
 1019         }
 1020 
 1021         for (lcv = 0, current_offset = offset ; lcv < maxpages ;
 1022             lcv++, current_offset += PAGE_SIZE) {
 1023 
 1024                 /*
 1025                  * - skip over pages we've already gotten or don't want
 1026                  * - skip over pages we don't _have_ to get
 1027                  */
 1028 
 1029                 if (pps[lcv] != NULL ||
 1030                     (lcv != centeridx && (flags & PGO_ALLPAGES) == 0))
 1031                         continue;
 1032 
 1033                 pageidx = current_offset >> PAGE_SHIFT;
 1034 
 1035                 /*
 1036                  * we have yet to locate the current page (pps[lcv]).   we
 1037                  * first look for a page that is already at the current offset.
 1038                  * if we find a page, we check to see if it is busy or
 1039                  * released.  if that is the case, then we sleep on the page
 1040                  * until it is no longer busy or released and repeat the lookup.
 1041                  * if the page we found is neither busy nor released, then we
 1042                  * busy it (so we own it) and plug it into pps[lcv].   this
 1043                  * 'break's the following while loop and indicates we are
 1044                  * ready to move on to the next page in the "lcv" loop above.
 1045                  *
 1046                  * if we exit the while loop with pps[lcv] still set to NULL,
 1047                  * then it means that we allocated a new busy/fake/clean page
 1048                  * ptmp in the object and we need to do I/O to fill in the data.
 1049                  */
 1050 
 1051                 /* top of "pps" while loop */
 1052                 while (pps[lcv] == NULL) {
 1053                         /* look for a resident page */
 1054                         ptmp = uvm_pagelookup(uobj, current_offset);
 1055 
 1056                         /* not resident?   allocate one now (if we can) */
 1057                         if (ptmp == NULL) {
 1058 
 1059                                 ptmp = uvm_pagealloc(uobj, current_offset,
 1060                                     NULL, 0);
 1061 
 1062                                 /* out of RAM? */
 1063                                 if (ptmp == NULL) {
 1064                                         simple_unlock(&uobj->vmobjlock);
 1065                                         UVMHIST_LOG(pdhist,
 1066                                             "sleeping, ptmp == NULL\n",0,0,0,0);
 1067                                         uvm_wait("uao_getpage");
 1068                                         simple_lock(&uobj->vmobjlock);
 1069                                         continue;
 1070                                 }
 1071 
 1072                                 /*
 1073                                  * safe with PQ's unlocked: because we just
 1074                                  * alloc'd the page
 1075                                  */
 1076 
 1077                                 ptmp->pqflags |= PQ_AOBJ;
 1078 
 1079                                 /*
 1080                                  * got new page ready for I/O.  break pps while
 1081                                  * loop.  pps[lcv] is still NULL.
 1082                                  */
 1083 
 1084                                 break;
 1085                         }
 1086 
 1087                         /* page is there, see if we need to wait on it */
 1088                         if ((ptmp->flags & PG_BUSY) != 0) {
 1089                                 ptmp->flags |= PG_WANTED;
 1090                                 UVMHIST_LOG(pdhist,
 1091                                     "sleeping, ptmp->flags 0x%x\n",
 1092                                     ptmp->flags,0,0,0);
 1093                                 UVM_UNLOCK_AND_WAIT(ptmp, &uobj->vmobjlock,
 1094                                     FALSE, "uao_get", 0);
 1095                                 simple_lock(&uobj->vmobjlock);
 1096                                 continue;
 1097                         }
 1098 
 1099                         /*
 1100                          * if we get here then the page has become resident and
 1101                          * unbusy between steps 1 and 2.  we busy it now (so we
 1102                          * own it) and set pps[lcv] (so that we exit the while
 1103                          * loop).
 1104                          */
 1105 
 1106                         /* we own it, caller must un-busy */
 1107                         ptmp->flags |= PG_BUSY;
 1108                         UVM_PAGE_OWN(ptmp, "uao_get2");
 1109                         pps[lcv] = ptmp;
 1110                 }
 1111 
 1112                 /*
 1113                  * if we own the valid page at the correct offset, pps[lcv] will
 1114                  * point to it.   nothing more to do except go to the next page.
 1115                  */
 1116 
 1117                 if (pps[lcv])
 1118                         continue;                       /* next lcv */
 1119 
 1120                 /*
 1121                  * we have a "fake/busy/clean" page that we just allocated.
 1122                  * do the needed "i/o", either reading from swap or zeroing.
 1123                  */
 1124 
 1125                 swslot = uao_find_swslot(&aobj->u_obj, pageidx);
 1126 
 1127                 /*
 1128                  * just zero the page if there's nothing in swap.
 1129                  */
 1130 
 1131                 if (swslot == 0) {
 1132 
 1133                         /*
 1134                          * page hasn't existed before, just zero it.
 1135                          */
 1136 
 1137                         uvm_pagezero(ptmp);
 1138                 } else {
 1139 #if defined(VMSWAP)
 1140                         int error;
 1141 
 1142                         UVMHIST_LOG(pdhist, "pagein from swslot %d",
 1143                              swslot, 0,0,0);
 1144 
 1145                         /*
 1146                          * page in the swapped-out page.
 1147                          * unlock object for i/o, relock when done.
 1148                          */
 1149 
 1150                         simple_unlock(&uobj->vmobjlock);
 1151                         error = uvm_swap_get(ptmp, swslot, PGO_SYNCIO);
 1152                         simple_lock(&uobj->vmobjlock);
 1153 
 1154                         /*
 1155                          * I/O done.  check for errors.
 1156                          */
 1157 
 1158                         if (error != 0) {
 1159                                 UVMHIST_LOG(pdhist, "<- done (error=%d)",
 1160                                     error,0,0,0);
 1161                                 if (ptmp->flags & PG_WANTED)
 1162                                         wakeup(ptmp);
 1163 
 1164                                 /*
 1165                                  * remove the swap slot from the aobj
 1166                                  * and mark the aobj as having no real slot.
 1167                                  * don't free the swap slot, thus preventing
 1168                                  * it from being used again.
 1169                                  */
 1170 
 1171                                 swslot = uao_set_swslot(&aobj->u_obj, pageidx,
 1172                                                         SWSLOT_BAD);
 1173                                 if (swslot > 0) {
 1174                                         uvm_swap_markbad(swslot, 1);
 1175                                 }
 1176 
 1177                                 uvm_lock_pageq();
 1178                                 uvm_pagefree(ptmp);
 1179                                 uvm_unlock_pageq();
 1180                                 simple_unlock(&uobj->vmobjlock);
 1181                                 return error;
 1182                         }
 1183 #else /* defined(VMSWAP) */
 1184                         panic("%s: pagein", __func__);
 1185 #endif /* defined(VMSWAP) */
 1186                 }
 1187 
 1188                 if ((access_type & VM_PROT_WRITE) == 0) {
 1189                         ptmp->flags |= PG_CLEAN;
 1190                         pmap_clear_modify(ptmp);
 1191                 }
 1192 
 1193                 /*
 1194                  * we got the page!   clear the fake flag (indicates valid
 1195                  * data now in page) and plug into our result array.   note
 1196                  * that page is still busy.
 1197                  *
 1198                  * it is the callers job to:
 1199                  * => check if the page is released
 1200                  * => unbusy the page
 1201                  * => activate the page
 1202                  */
 1203 
 1204                 ptmp->flags &= ~PG_FAKE;
 1205                 pps[lcv] = ptmp;
 1206         }
 1207 
 1208         /*
 1209          * finally, unlock object and return.
 1210          */
 1211 
 1212 done:
 1213         simple_unlock(&uobj->vmobjlock);
 1214         UVMHIST_LOG(pdhist, "<- done (OK)",0,0,0,0);
 1215         return 0;
 1216 }
 1217 
 1218 #if defined(VMSWAP)
 1219 
 1220 /*
 1221  * uao_dropswap:  release any swap resources from this aobj page.
 1222  *
 1223  * => aobj must be locked or have a reference count of 0.
 1224  */
 1225 
 1226 void
 1227 uao_dropswap(struct uvm_object *uobj, int pageidx)
 1228 {
 1229         int slot;
 1230 
 1231         slot = uao_set_swslot(uobj, pageidx, 0);
 1232         if (slot) {
 1233                 uvm_swap_free(slot, 1);
 1234         }
 1235 }
 1236 
 1237 /*
 1238  * page in every page in every aobj that is paged-out to a range of swslots.
 1239  *
 1240  * => nothing should be locked.
 1241  * => returns TRUE if pagein was aborted due to lack of memory.
 1242  */
 1243 
 1244 boolean_t
 1245 uao_swap_off(int startslot, int endslot)
 1246 {
 1247         struct uvm_aobj *aobj, *nextaobj;
 1248         boolean_t rv;
 1249 
 1250         /*
 1251          * walk the list of all aobjs.
 1252          */
 1253 
 1254 restart:
 1255         simple_lock(&uao_list_lock);
 1256         for (aobj = LIST_FIRST(&uao_list);
 1257              aobj != NULL;
 1258              aobj = nextaobj) {
 1259 
 1260                 /*
 1261                  * try to get the object lock, start all over if we fail.
 1262                  * most of the time we'll get the aobj lock,
 1263                  * so this should be a rare case.
 1264                  */
 1265 
 1266                 if (!simple_lock_try(&aobj->u_obj.vmobjlock)) {
 1267                         simple_unlock(&uao_list_lock);
 1268                         goto restart;
 1269                 }
 1270 
 1271                 /*
 1272                  * add a ref to the aobj so it doesn't disappear
 1273                  * while we're working.
 1274                  */
 1275 
 1276                 uao_reference_locked(&aobj->u_obj);
 1277 
 1278                 /*
 1279                  * now it's safe to unlock the uao list.
 1280                  */
 1281 
 1282                 simple_unlock(&uao_list_lock);
 1283 
 1284                 /*
 1285                  * page in any pages in the swslot range.
 1286                  * if there's an error, abort and return the error.
 1287                  */
 1288 
 1289                 rv = uao_pagein(aobj, startslot, endslot);
 1290                 if (rv) {
 1291                         uao_detach_locked(&aobj->u_obj);
 1292                         return rv;
 1293                 }
 1294 
 1295                 /*
 1296                  * we're done with this aobj.
 1297                  * relock the list and drop our ref on the aobj.
 1298                  */
 1299 
 1300                 simple_lock(&uao_list_lock);
 1301                 nextaobj = LIST_NEXT(aobj, u_list);
 1302                 uao_detach_locked(&aobj->u_obj);
 1303         }
 1304 
 1305         /*
 1306          * done with traversal, unlock the list
 1307          */
 1308         simple_unlock(&uao_list_lock);
 1309         return FALSE;
 1310 }
 1311 
 1312 
 1313 /*
 1314  * page in any pages from aobj in the given range.
 1315  *
 1316  * => aobj must be locked and is returned locked.
 1317  * => returns TRUE if pagein was aborted due to lack of memory.
 1318  */
 1319 static boolean_t
 1320 uao_pagein(struct uvm_aobj *aobj, int startslot, int endslot)
 1321 {
 1322         boolean_t rv;
 1323 
 1324         if (UAO_USES_SWHASH(aobj)) {
 1325                 struct uao_swhash_elt *elt;
 1326                 int buck;
 1327 
 1328 restart:
 1329                 for (buck = aobj->u_swhashmask; buck >= 0; buck--) {
 1330                         for (elt = LIST_FIRST(&aobj->u_swhash[buck]);
 1331                              elt != NULL;
 1332                              elt = LIST_NEXT(elt, list)) {
 1333                                 int i;
 1334 
 1335                                 for (i = 0; i < UAO_SWHASH_CLUSTER_SIZE; i++) {
 1336                                         int slot = elt->slots[i];
 1337 
 1338                                         /*
 1339                                          * if the slot isn't in range, skip it.
 1340                                          */
 1341 
 1342                                         if (slot < startslot ||
 1343                                             slot >= endslot) {
 1344                                                 continue;
 1345                                         }
 1346 
 1347                                         /*
 1348                                          * process the page,
 1349                                          * the start over on this object
 1350                                          * since the swhash elt
 1351                                          * may have been freed.
 1352                                          */
 1353 
 1354                                         rv = uao_pagein_page(aobj,
 1355                                           UAO_SWHASH_ELT_PAGEIDX_BASE(elt) + i);
 1356                                         if (rv) {
 1357                                                 return rv;
 1358                                         }
 1359                                         goto restart;
 1360                                 }
 1361                         }
 1362                 }
 1363         } else {
 1364                 int i;
 1365 
 1366                 for (i = 0; i < aobj->u_pages; i++) {
 1367                         int slot = aobj->u_swslots[i];
 1368 
 1369                         /*
 1370                          * if the slot isn't in range, skip it
 1371                          */
 1372 
 1373                         if (slot < startslot || slot >= endslot) {
 1374                                 continue;
 1375                         }
 1376 
 1377                         /*
 1378                          * process the page.
 1379                          */
 1380 
 1381                         rv = uao_pagein_page(aobj, i);
 1382                         if (rv) {
 1383                                 return rv;
 1384                         }
 1385                 }
 1386         }
 1387 
 1388         return FALSE;
 1389 }
 1390 
 1391 /*
 1392  * page in a page from an aobj.  used for swap_off.
 1393  * returns TRUE if pagein was aborted due to lack of memory.
 1394  *
 1395  * => aobj must be locked and is returned locked.
 1396  */
 1397 
 1398 static boolean_t
 1399 uao_pagein_page(struct uvm_aobj *aobj, int pageidx)
 1400 {
 1401         struct vm_page *pg;
 1402         int rv, npages;
 1403 
 1404         pg = NULL;
 1405         npages = 1;
 1406         /* locked: aobj */
 1407         rv = uao_get(&aobj->u_obj, pageidx << PAGE_SHIFT,
 1408             &pg, &npages, 0, VM_PROT_READ|VM_PROT_WRITE, 0, PGO_SYNCIO);
 1409         /* unlocked: aobj */
 1410 
 1411         /*
 1412          * relock and finish up.
 1413          */
 1414 
 1415         simple_lock(&aobj->u_obj.vmobjlock);
 1416         switch (rv) {
 1417         case 0:
 1418                 break;
 1419 
 1420         case EIO:
 1421         case ERESTART:
 1422 
 1423                 /*
 1424                  * nothing more to do on errors.
 1425                  * ERESTART can only mean that the anon was freed,
 1426                  * so again there's nothing to do.
 1427                  */
 1428 
 1429                 return FALSE;
 1430 
 1431         default:
 1432                 return TRUE;
 1433         }
 1434 
 1435         /*
 1436          * ok, we've got the page now.
 1437          * mark it as dirty, clear its swslot and un-busy it.
 1438          */
 1439         uao_dropswap(&aobj->u_obj, pageidx);
 1440 
 1441         /*
 1442          * make sure it's on a page queue.
 1443          */
 1444         uvm_lock_pageq();
 1445         if (pg->wire_count == 0)
 1446                 uvm_pageenqueue(pg);
 1447         uvm_unlock_pageq();
 1448 
 1449         if (pg->flags & PG_WANTED) {
 1450                 wakeup(pg);
 1451         }
 1452         pg->flags &= ~(PG_WANTED|PG_BUSY|PG_CLEAN|PG_FAKE);
 1453         UVM_PAGE_OWN(pg, NULL);
 1454 
 1455         return FALSE;
 1456 }
 1457 
 1458 /*
 1459  * uao_dropswap_range: drop swapslots in the range.
 1460  *
 1461  * => aobj must be locked and is returned locked.
 1462  * => start is inclusive.  end is exclusive.
 1463  */
 1464 
 1465 void
 1466 uao_dropswap_range(struct uvm_object *uobj, voff_t start, voff_t end)
 1467 {
 1468         struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
 1469 
 1470         LOCK_ASSERT(simple_lock_held(&uobj->vmobjlock));
 1471 
 1472         uao_dropswap_range1(aobj, start, end);
 1473 }
 1474 
 1475 static void
 1476 uao_dropswap_range1(struct uvm_aobj *aobj, voff_t start, voff_t end)
 1477 {
 1478         int swpgonlydelta = 0;
 1479 
 1480         if (end == 0) {
 1481                 end = INT64_MAX;
 1482         }
 1483 
 1484         if (UAO_USES_SWHASH(aobj)) {
 1485                 int i, hashbuckets = aobj->u_swhashmask + 1;
 1486                 voff_t taghi;
 1487                 voff_t taglo;
 1488 
 1489                 taglo = UAO_SWHASH_ELT_TAG(start);
 1490                 taghi = UAO_SWHASH_ELT_TAG(end);
 1491 
 1492                 for (i = 0; i < hashbuckets; i++) {
 1493                         struct uao_swhash_elt *elt, *next;
 1494 
 1495                         for (elt = LIST_FIRST(&aobj->u_swhash[i]);
 1496                              elt != NULL;
 1497                              elt = next) {
 1498                                 int startidx, endidx;
 1499                                 int j;
 1500 
 1501                                 next = LIST_NEXT(elt, list);
 1502 
 1503                                 if (elt->tag < taglo || taghi < elt->tag) {
 1504                                         continue;
 1505                                 }
 1506 
 1507                                 if (elt->tag == taglo) {
 1508                                         startidx =
 1509                                             UAO_SWHASH_ELT_PAGESLOT_IDX(start);
 1510                                 } else {
 1511                                         startidx = 0;
 1512                                 }
 1513 
 1514                                 if (elt->tag == taghi) {
 1515                                         endidx =
 1516                                             UAO_SWHASH_ELT_PAGESLOT_IDX(end);
 1517                                 } else {
 1518                                         endidx = UAO_SWHASH_CLUSTER_SIZE;
 1519                                 }
 1520 
 1521                                 for (j = startidx; j < endidx; j++) {
 1522                                         int slot = elt->slots[j];
 1523 
 1524                                         KASSERT(uvm_pagelookup(&aobj->u_obj,
 1525                                             (UAO_SWHASH_ELT_PAGEIDX_BASE(elt)
 1526                                             + j) << PAGE_SHIFT) == NULL);
 1527                                         if (slot > 0) {
 1528                                                 uvm_swap_free(slot, 1);
 1529                                                 swpgonlydelta++;
 1530                                                 KASSERT(elt->count > 0);
 1531                                                 elt->slots[j] = 0;
 1532                                                 elt->count--;
 1533                                         }
 1534                                 }
 1535 
 1536                                 if (elt->count == 0) {
 1537                                         LIST_REMOVE(elt, list);
 1538                                         pool_put(&uao_swhash_elt_pool, elt);
 1539                                 }
 1540                         }
 1541                 }
 1542         } else {
 1543                 int i;
 1544 
 1545                 if (aobj->u_pages < end) {
 1546                         end = aobj->u_pages;
 1547                 }
 1548                 for (i = start; i < end; i++) {
 1549                         int slot = aobj->u_swslots[i];
 1550 
 1551                         if (slot > 0) {
 1552                                 uvm_swap_free(slot, 1);
 1553                                 swpgonlydelta++;
 1554                         }
 1555                 }
 1556         }
 1557 
 1558         /*
 1559          * adjust the counter of pages only in swap for all
 1560          * the swap slots we've freed.
 1561          */
 1562 
 1563         if (swpgonlydelta > 0) {
 1564                 simple_lock(&uvm.swap_data_lock);
 1565                 KASSERT(uvmexp.swpgonly >= swpgonlydelta);
 1566                 uvmexp.swpgonly -= swpgonlydelta;
 1567                 simple_unlock(&uvm.swap_data_lock);
 1568         }
 1569 }
 1570 
 1571 #endif /* defined(VMSWAP) */

Cache object: 13776dbff7a721754f4010865f2ffe91


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.