The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/uvm/uvm_aobj.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*      $NetBSD: uvm_aobj.c,v 1.104 2008/10/18 03:46:22 rmind Exp $     */
    2 
    3 /*
    4  * Copyright (c) 1998 Chuck Silvers, Charles D. Cranor and
    5  *                    Washington University.
    6  * All rights reserved.
    7  *
    8  * Redistribution and use in source and binary forms, with or without
    9  * modification, are permitted provided that the following conditions
   10  * are met:
   11  * 1. Redistributions of source code must retain the above copyright
   12  *    notice, this list of conditions and the following disclaimer.
   13  * 2. Redistributions in binary form must reproduce the above copyright
   14  *    notice, this list of conditions and the following disclaimer in the
   15  *    documentation and/or other materials provided with the distribution.
   16  * 3. All advertising materials mentioning features or use of this software
   17  *    must display the following acknowledgement:
   18  *      This product includes software developed by Charles D. Cranor and
   19  *      Washington University.
   20  * 4. The name of the author may not be used to endorse or promote products
   21  *    derived from this software without specific prior written permission.
   22  *
   23  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
   24  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
   25  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
   26  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
   27  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
   28  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
   29  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
   30  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
   31  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
   32  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
   33  *
   34  * from: Id: uvm_aobj.c,v 1.1.2.5 1998/02/06 05:14:38 chs Exp
   35  */
   36 /*
   37  * uvm_aobj.c: anonymous memory uvm_object pager
   38  *
   39  * author: Chuck Silvers <chuq@chuq.com>
   40  * started: Jan-1998
   41  *
   42  * - design mostly from Chuck Cranor
   43  */
   44 
   45 #include <sys/cdefs.h>
   46 __KERNEL_RCSID(0, "$NetBSD: uvm_aobj.c,v 1.104 2008/10/18 03:46:22 rmind Exp $");
   47 
   48 #include "opt_uvmhist.h"
   49 
   50 #include <sys/param.h>
   51 #include <sys/systm.h>
   52 #include <sys/proc.h>
   53 #include <sys/kernel.h>
   54 #include <sys/kmem.h>
   55 #include <sys/pool.h>
   56 
   57 #include <uvm/uvm.h>
   58 
   59 /*
   60  * an aobj manages anonymous-memory backed uvm_objects.   in addition
   61  * to keeping the list of resident pages, it also keeps a list of
   62  * allocated swap blocks.  depending on the size of the aobj this list
   63  * of allocated swap blocks is either stored in an array (small objects)
   64  * or in a hash table (large objects).
   65  */
   66 
   67 /*
   68  * local structures
   69  */
   70 
   71 /*
   72  * for hash tables, we break the address space of the aobj into blocks
   73  * of UAO_SWHASH_CLUSTER_SIZE pages.   we require the cluster size to
   74  * be a power of two.
   75  */
   76 
   77 #define UAO_SWHASH_CLUSTER_SHIFT 4
   78 #define UAO_SWHASH_CLUSTER_SIZE (1 << UAO_SWHASH_CLUSTER_SHIFT)
   79 
   80 /* get the "tag" for this page index */
   81 #define UAO_SWHASH_ELT_TAG(PAGEIDX) \
   82         ((PAGEIDX) >> UAO_SWHASH_CLUSTER_SHIFT)
   83 
   84 #define UAO_SWHASH_ELT_PAGESLOT_IDX(PAGEIDX) \
   85         ((PAGEIDX) & (UAO_SWHASH_CLUSTER_SIZE - 1))
   86 
   87 /* given an ELT and a page index, find the swap slot */
   88 #define UAO_SWHASH_ELT_PAGESLOT(ELT, PAGEIDX) \
   89         ((ELT)->slots[UAO_SWHASH_ELT_PAGESLOT_IDX(PAGEIDX)])
   90 
   91 /* given an ELT, return its pageidx base */
   92 #define UAO_SWHASH_ELT_PAGEIDX_BASE(ELT) \
   93         ((ELT)->tag << UAO_SWHASH_CLUSTER_SHIFT)
   94 
   95 /*
   96  * the swhash hash function
   97  */
   98 
   99 #define UAO_SWHASH_HASH(AOBJ, PAGEIDX) \
  100         (&(AOBJ)->u_swhash[(((PAGEIDX) >> UAO_SWHASH_CLUSTER_SHIFT) \
  101                             & (AOBJ)->u_swhashmask)])
  102 
  103 /*
  104  * the swhash threshhold determines if we will use an array or a
  105  * hash table to store the list of allocated swap blocks.
  106  */
  107 
  108 #define UAO_SWHASH_THRESHOLD (UAO_SWHASH_CLUSTER_SIZE * 4)
  109 #define UAO_USES_SWHASH(AOBJ) \
  110         ((AOBJ)->u_pages > UAO_SWHASH_THRESHOLD)        /* use hash? */
  111 
  112 /*
  113  * the number of buckets in a swhash, with an upper bound
  114  */
  115 
  116 #define UAO_SWHASH_MAXBUCKETS 256
  117 #define UAO_SWHASH_BUCKETS(AOBJ) \
  118         (MIN((AOBJ)->u_pages >> UAO_SWHASH_CLUSTER_SHIFT, \
  119              UAO_SWHASH_MAXBUCKETS))
  120 
  121 /*
  122  * uao_swhash_elt: when a hash table is being used, this structure defines
  123  * the format of an entry in the bucket list.
  124  */
  125 
  126 struct uao_swhash_elt {
  127         LIST_ENTRY(uao_swhash_elt) list;        /* the hash list */
  128         voff_t tag;                             /* our 'tag' */
  129         int count;                              /* our number of active slots */
  130         int slots[UAO_SWHASH_CLUSTER_SIZE];     /* the slots */
  131 };
  132 
  133 /*
  134  * uao_swhash: the swap hash table structure
  135  */
  136 
  137 LIST_HEAD(uao_swhash, uao_swhash_elt);
  138 
  139 /*
  140  * uao_swhash_elt_pool: pool of uao_swhash_elt structures
  141  * NOTE: Pages for this pool must not come from a pageable kernel map!
  142  */
  143 static POOL_INIT(uao_swhash_elt_pool, sizeof(struct uao_swhash_elt), 0, 0, 0,
  144     "uaoeltpl", NULL, IPL_VM);
  145 
  146 static struct pool_cache uvm_aobj_cache;
  147 
  148 /*
  149  * uvm_aobj: the actual anon-backed uvm_object
  150  *
  151  * => the uvm_object is at the top of the structure, this allows
  152  *   (struct uvm_aobj *) == (struct uvm_object *)
  153  * => only one of u_swslots and u_swhash is used in any given aobj
  154  */
  155 
  156 struct uvm_aobj {
  157         struct uvm_object u_obj; /* has: lock, pgops, memq, #pages, #refs */
  158         pgoff_t u_pages;         /* number of pages in entire object */
  159         int u_flags;             /* the flags (see uvm_aobj.h) */
  160         int *u_swslots;          /* array of offset->swapslot mappings */
  161                                  /*
  162                                   * hashtable of offset->swapslot mappings
  163                                   * (u_swhash is an array of bucket heads)
  164                                   */
  165         struct uao_swhash *u_swhash;
  166         u_long u_swhashmask;            /* mask for hashtable */
  167         LIST_ENTRY(uvm_aobj) u_list;    /* global list of aobjs */
  168 };
  169 
  170 /*
  171  * local functions
  172  */
  173 
  174 static void     uao_free(struct uvm_aobj *);
  175 static int      uao_get(struct uvm_object *, voff_t, struct vm_page **,
  176                     int *, int, vm_prot_t, int, int);
  177 static int      uao_put(struct uvm_object *, voff_t, voff_t, int);
  178 
  179 #if defined(VMSWAP)
  180 static struct uao_swhash_elt *uao_find_swhash_elt
  181     (struct uvm_aobj *, int, bool);
  182 
  183 static bool uao_pagein(struct uvm_aobj *, int, int);
  184 static bool uao_pagein_page(struct uvm_aobj *, int);
  185 static void uao_dropswap_range1(struct uvm_aobj *, voff_t, voff_t);
  186 #endif /* defined(VMSWAP) */
  187 
  188 /*
  189  * aobj_pager
  190  *
  191  * note that some functions (e.g. put) are handled elsewhere
  192  */
  193 
  194 const struct uvm_pagerops aobj_pager = {
  195         .pgo_reference = uao_reference,
  196         .pgo_detach = uao_detach,
  197         .pgo_get = uao_get,
  198         .pgo_put = uao_put,
  199 };
  200 
  201 /*
  202  * uao_list: global list of active aobjs, locked by uao_list_lock
  203  */
  204 
  205 static LIST_HEAD(aobjlist, uvm_aobj) uao_list;
  206 static kmutex_t uao_list_lock;
  207 
  208 /*
  209  * functions
  210  */
  211 
  212 /*
  213  * hash table/array related functions
  214  */
  215 
  216 #if defined(VMSWAP)
  217 
  218 /*
  219  * uao_find_swhash_elt: find (or create) a hash table entry for a page
  220  * offset.
  221  *
  222  * => the object should be locked by the caller
  223  */
  224 
  225 static struct uao_swhash_elt *
  226 uao_find_swhash_elt(struct uvm_aobj *aobj, int pageidx, bool create)
  227 {
  228         struct uao_swhash *swhash;
  229         struct uao_swhash_elt *elt;
  230         voff_t page_tag;
  231 
  232         swhash = UAO_SWHASH_HASH(aobj, pageidx);
  233         page_tag = UAO_SWHASH_ELT_TAG(pageidx);
  234 
  235         /*
  236          * now search the bucket for the requested tag
  237          */
  238 
  239         LIST_FOREACH(elt, swhash, list) {
  240                 if (elt->tag == page_tag) {
  241                         return elt;
  242                 }
  243         }
  244         if (!create) {
  245                 return NULL;
  246         }
  247 
  248         /*
  249          * allocate a new entry for the bucket and init/insert it in
  250          */
  251 
  252         elt = pool_get(&uao_swhash_elt_pool, PR_NOWAIT);
  253         if (elt == NULL) {
  254                 return NULL;
  255         }
  256         LIST_INSERT_HEAD(swhash, elt, list);
  257         elt->tag = page_tag;
  258         elt->count = 0;
  259         memset(elt->slots, 0, sizeof(elt->slots));
  260         return elt;
  261 }
  262 
  263 /*
  264  * uao_find_swslot: find the swap slot number for an aobj/pageidx
  265  *
  266  * => object must be locked by caller
  267  */
  268 
  269 int
  270 uao_find_swslot(struct uvm_object *uobj, int pageidx)
  271 {
  272         struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
  273         struct uao_swhash_elt *elt;
  274 
  275         /*
  276          * if noswap flag is set, then we never return a slot
  277          */
  278 
  279         if (aobj->u_flags & UAO_FLAG_NOSWAP)
  280                 return(0);
  281 
  282         /*
  283          * if hashing, look in hash table.
  284          */
  285 
  286         if (UAO_USES_SWHASH(aobj)) {
  287                 elt = uao_find_swhash_elt(aobj, pageidx, false);
  288                 if (elt)
  289                         return(UAO_SWHASH_ELT_PAGESLOT(elt, pageidx));
  290                 else
  291                         return(0);
  292         }
  293 
  294         /*
  295          * otherwise, look in the array
  296          */
  297 
  298         return(aobj->u_swslots[pageidx]);
  299 }
  300 
  301 /*
  302  * uao_set_swslot: set the swap slot for a page in an aobj.
  303  *
  304  * => setting a slot to zero frees the slot
  305  * => object must be locked by caller
  306  * => we return the old slot number, or -1 if we failed to allocate
  307  *    memory to record the new slot number
  308  */
  309 
  310 int
  311 uao_set_swslot(struct uvm_object *uobj, int pageidx, int slot)
  312 {
  313         struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
  314         struct uao_swhash_elt *elt;
  315         int oldslot;
  316         UVMHIST_FUNC("uao_set_swslot"); UVMHIST_CALLED(pdhist);
  317         UVMHIST_LOG(pdhist, "aobj %p pageidx %d slot %d",
  318             aobj, pageidx, slot, 0);
  319 
  320         /*
  321          * if noswap flag is set, then we can't set a non-zero slot.
  322          */
  323 
  324         if (aobj->u_flags & UAO_FLAG_NOSWAP) {
  325                 if (slot == 0)
  326                         return(0);
  327 
  328                 printf("uao_set_swslot: uobj = %p\n", uobj);
  329                 panic("uao_set_swslot: NOSWAP object");
  330         }
  331 
  332         /*
  333          * are we using a hash table?  if so, add it in the hash.
  334          */
  335 
  336         if (UAO_USES_SWHASH(aobj)) {
  337 
  338                 /*
  339                  * Avoid allocating an entry just to free it again if
  340                  * the page had not swap slot in the first place, and
  341                  * we are freeing.
  342                  */
  343 
  344                 elt = uao_find_swhash_elt(aobj, pageidx, slot != 0);
  345                 if (elt == NULL) {
  346                         return slot ? -1 : 0;
  347                 }
  348 
  349                 oldslot = UAO_SWHASH_ELT_PAGESLOT(elt, pageidx);
  350                 UAO_SWHASH_ELT_PAGESLOT(elt, pageidx) = slot;
  351 
  352                 /*
  353                  * now adjust the elt's reference counter and free it if we've
  354                  * dropped it to zero.
  355                  */
  356 
  357                 if (slot) {
  358                         if (oldslot == 0)
  359                                 elt->count++;
  360                 } else {
  361                         if (oldslot)
  362                                 elt->count--;
  363 
  364                         if (elt->count == 0) {
  365                                 LIST_REMOVE(elt, list);
  366                                 pool_put(&uao_swhash_elt_pool, elt);
  367                         }
  368                 }
  369         } else {
  370                 /* we are using an array */
  371                 oldslot = aobj->u_swslots[pageidx];
  372                 aobj->u_swslots[pageidx] = slot;
  373         }
  374         return (oldslot);
  375 }
  376 
  377 #endif /* defined(VMSWAP) */
  378 
  379 /*
  380  * end of hash/array functions
  381  */
  382 
  383 /*
  384  * uao_free: free all resources held by an aobj, and then free the aobj
  385  *
  386  * => the aobj should be dead
  387  */
  388 
  389 static void
  390 uao_free(struct uvm_aobj *aobj)
  391 {
  392         int swpgonlydelta = 0;
  393 
  394 
  395 #if defined(VMSWAP)
  396         uao_dropswap_range1(aobj, 0, 0);
  397 #endif /* defined(VMSWAP) */
  398 
  399         mutex_exit(&aobj->u_obj.vmobjlock);
  400 
  401 #if defined(VMSWAP)
  402         if (UAO_USES_SWHASH(aobj)) {
  403 
  404                 /*
  405                  * free the hash table itself.
  406                  */
  407 
  408                 hashdone(aobj->u_swhash, HASH_LIST, aobj->u_swhashmask);
  409         } else {
  410 
  411                 /*
  412                  * free the array itsself.
  413                  */
  414 
  415                 kmem_free(aobj->u_swslots, aobj->u_pages * sizeof(int));
  416         }
  417 #endif /* defined(VMSWAP) */
  418 
  419         /*
  420          * finally free the aobj itself
  421          */
  422 
  423         UVM_OBJ_DESTROY(&aobj->u_obj);
  424         pool_cache_put(&uvm_aobj_cache, aobj);
  425 
  426         /*
  427          * adjust the counter of pages only in swap for all
  428          * the swap slots we've freed.
  429          */
  430 
  431         if (swpgonlydelta > 0) {
  432                 mutex_enter(&uvm_swap_data_lock);
  433                 KASSERT(uvmexp.swpgonly >= swpgonlydelta);
  434                 uvmexp.swpgonly -= swpgonlydelta;
  435                 mutex_exit(&uvm_swap_data_lock);
  436         }
  437 }
  438 
  439 /*
  440  * pager functions
  441  */
  442 
  443 /*
  444  * uao_create: create an aobj of the given size and return its uvm_object.
  445  *
  446  * => for normal use, flags are always zero
  447  * => for the kernel object, the flags are:
  448  *      UAO_FLAG_KERNOBJ - allocate the kernel object (can only happen once)
  449  *      UAO_FLAG_KERNSWAP - enable swapping of kernel object ("           ")
  450  */
  451 
  452 struct uvm_object *
  453 uao_create(vsize_t size, int flags)
  454 {
  455         static struct uvm_aobj kernel_object_store;
  456         static int kobj_alloced = 0;
  457         pgoff_t pages = round_page(size) >> PAGE_SHIFT;
  458         struct uvm_aobj *aobj;
  459         int refs;
  460 
  461         /*
  462          * malloc a new aobj unless we are asked for the kernel object
  463          */
  464 
  465         if (flags & UAO_FLAG_KERNOBJ) {
  466                 KASSERT(!kobj_alloced);
  467                 aobj = &kernel_object_store;
  468                 aobj->u_pages = pages;
  469                 aobj->u_flags = UAO_FLAG_NOSWAP;
  470                 refs = UVM_OBJ_KERN;
  471                 kobj_alloced = UAO_FLAG_KERNOBJ;
  472         } else if (flags & UAO_FLAG_KERNSWAP) {
  473                 KASSERT(kobj_alloced == UAO_FLAG_KERNOBJ);
  474                 aobj = &kernel_object_store;
  475                 kobj_alloced = UAO_FLAG_KERNSWAP;
  476                 refs = 0xdeadbeaf; /* XXX: gcc */
  477         } else {
  478                 aobj = pool_cache_get(&uvm_aobj_cache, PR_WAITOK);
  479                 aobj->u_pages = pages;
  480                 aobj->u_flags = 0;
  481                 refs = 1;
  482         }
  483 
  484         /*
  485          * allocate hash/array if necessary
  486          *
  487          * note: in the KERNSWAP case no need to worry about locking since
  488          * we are still booting we should be the only thread around.
  489          */
  490 
  491         if (flags == 0 || (flags & UAO_FLAG_KERNSWAP) != 0) {
  492 #if defined(VMSWAP)
  493                 const int kernswap = (flags & UAO_FLAG_KERNSWAP) != 0;
  494 
  495                 /* allocate hash table or array depending on object size */
  496                 if (UAO_USES_SWHASH(aobj)) {
  497                         aobj->u_swhash = hashinit(UAO_SWHASH_BUCKETS(aobj),
  498                             HASH_LIST, kernswap ? false : true,
  499                             &aobj->u_swhashmask);
  500                         if (aobj->u_swhash == NULL)
  501                                 panic("uao_create: hashinit swhash failed");
  502                 } else {
  503                         aobj->u_swslots = kmem_zalloc(pages * sizeof(int),
  504                             kernswap ? KM_NOSLEEP : KM_SLEEP);
  505                         if (aobj->u_swslots == NULL)
  506                                 panic("uao_create: malloc swslots failed");
  507                 }
  508 #endif /* defined(VMSWAP) */
  509 
  510                 if (flags) {
  511                         aobj->u_flags &= ~UAO_FLAG_NOSWAP; /* clear noswap */
  512                         return(&aobj->u_obj);
  513                 }
  514         }
  515 
  516         /*
  517          * init aobj fields
  518          */
  519 
  520         UVM_OBJ_INIT(&aobj->u_obj, &aobj_pager, refs);
  521 
  522         /*
  523          * now that aobj is ready, add it to the global list
  524          */
  525 
  526         mutex_enter(&uao_list_lock);
  527         LIST_INSERT_HEAD(&uao_list, aobj, u_list);
  528         mutex_exit(&uao_list_lock);
  529         return(&aobj->u_obj);
  530 }
  531 
  532 
  533 
  534 /*
  535  * uao_init: set up aobj pager subsystem
  536  *
  537  * => called at boot time from uvm_pager_init()
  538  */
  539 
  540 void
  541 uao_init(void)
  542 {
  543         static int uao_initialized;
  544 
  545         if (uao_initialized)
  546                 return;
  547         uao_initialized = true;
  548         LIST_INIT(&uao_list);
  549         mutex_init(&uao_list_lock, MUTEX_DEFAULT, IPL_NONE);
  550         pool_cache_bootstrap(&uvm_aobj_cache, sizeof(struct uvm_aobj), 0, 0,
  551             0, "aobj", NULL, IPL_NONE, NULL, NULL, NULL);
  552 }
  553 
  554 /*
  555  * uao_reference: add a ref to an aobj
  556  *
  557  * => aobj must be unlocked
  558  * => just lock it and call the locked version
  559  */
  560 
  561 void
  562 uao_reference(struct uvm_object *uobj)
  563 {
  564 
  565         /*
  566          * kernel_object already has plenty of references, leave it alone.
  567          */
  568 
  569         if (UVM_OBJ_IS_KERN_OBJECT(uobj))
  570                 return;
  571 
  572         mutex_enter(&uobj->vmobjlock);
  573         uao_reference_locked(uobj);
  574         mutex_exit(&uobj->vmobjlock);
  575 }
  576 
  577 /*
  578  * uao_reference_locked: add a ref to an aobj that is already locked
  579  *
  580  * => aobj must be locked
  581  * this needs to be separate from the normal routine
  582  * since sometimes we need to add a reference to an aobj when
  583  * it's already locked.
  584  */
  585 
  586 void
  587 uao_reference_locked(struct uvm_object *uobj)
  588 {
  589         UVMHIST_FUNC("uao_reference"); UVMHIST_CALLED(maphist);
  590 
  591         /*
  592          * kernel_object already has plenty of references, leave it alone.
  593          */
  594 
  595         if (UVM_OBJ_IS_KERN_OBJECT(uobj))
  596                 return;
  597 
  598         uobj->uo_refs++;
  599         UVMHIST_LOG(maphist, "<- done (uobj=0x%x, ref = %d)",
  600                     uobj, uobj->uo_refs,0,0);
  601 }
  602 
  603 /*
  604  * uao_detach: drop a reference to an aobj
  605  *
  606  * => aobj must be unlocked
  607  * => just lock it and call the locked version
  608  */
  609 
  610 void
  611 uao_detach(struct uvm_object *uobj)
  612 {
  613 
  614         /*
  615          * detaching from kernel_object is a noop.
  616          */
  617 
  618         if (UVM_OBJ_IS_KERN_OBJECT(uobj))
  619                 return;
  620 
  621         mutex_enter(&uobj->vmobjlock);
  622         uao_detach_locked(uobj);
  623 }
  624 
  625 /*
  626  * uao_detach_locked: drop a reference to an aobj
  627  *
  628  * => aobj must be locked, and is unlocked (or freed) upon return.
  629  * this needs to be separate from the normal routine
  630  * since sometimes we need to detach from an aobj when
  631  * it's already locked.
  632  */
  633 
  634 void
  635 uao_detach_locked(struct uvm_object *uobj)
  636 {
  637         struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
  638         struct vm_page *pg;
  639         UVMHIST_FUNC("uao_detach"); UVMHIST_CALLED(maphist);
  640 
  641         /*
  642          * detaching from kernel_object is a noop.
  643          */
  644 
  645         if (UVM_OBJ_IS_KERN_OBJECT(uobj)) {
  646                 mutex_exit(&uobj->vmobjlock);
  647                 return;
  648         }
  649 
  650         UVMHIST_LOG(maphist,"  (uobj=0x%x)  ref=%d", uobj,uobj->uo_refs,0,0);
  651         uobj->uo_refs--;
  652         if (uobj->uo_refs) {
  653                 mutex_exit(&uobj->vmobjlock);
  654                 UVMHIST_LOG(maphist, "<- done (rc>0)", 0,0,0,0);
  655                 return;
  656         }
  657 
  658         /*
  659          * remove the aobj from the global list.
  660          */
  661 
  662         mutex_enter(&uao_list_lock);
  663         LIST_REMOVE(aobj, u_list);
  664         mutex_exit(&uao_list_lock);
  665 
  666         /*
  667          * free all the pages left in the aobj.  for each page,
  668          * when the page is no longer busy (and thus after any disk i/o that
  669          * it's involved in is complete), release any swap resources and
  670          * free the page itself.
  671          */
  672 
  673         mutex_enter(&uvm_pageqlock);
  674         while ((pg = TAILQ_FIRST(&uobj->memq)) != NULL) {
  675                 pmap_page_protect(pg, VM_PROT_NONE);
  676                 if (pg->flags & PG_BUSY) {
  677                         pg->flags |= PG_WANTED;
  678                         mutex_exit(&uvm_pageqlock);
  679                         UVM_UNLOCK_AND_WAIT(pg, &uobj->vmobjlock, false,
  680                             "uao_det", 0);
  681                         mutex_enter(&uobj->vmobjlock);
  682                         mutex_enter(&uvm_pageqlock);
  683                         continue;
  684                 }
  685                 uao_dropswap(&aobj->u_obj, pg->offset >> PAGE_SHIFT);
  686                 uvm_pagefree(pg);
  687         }
  688         mutex_exit(&uvm_pageqlock);
  689 
  690         /*
  691          * finally, free the aobj itself.
  692          */
  693 
  694         uao_free(aobj);
  695 }
  696 
  697 /*
  698  * uao_put: flush pages out of a uvm object
  699  *
  700  * => object should be locked by caller.  we may _unlock_ the object
  701  *      if (and only if) we need to clean a page (PGO_CLEANIT).
  702  *      XXXJRT Currently, however, we don't.  In the case of cleaning
  703  *      XXXJRT a page, we simply just deactivate it.  Should probably
  704  *      XXXJRT handle this better, in the future (although "flushing"
  705  *      XXXJRT anonymous memory isn't terribly important).
  706  * => if PGO_CLEANIT is not set, then we will neither unlock the object
  707  *      or block.
  708  * => if PGO_ALLPAGE is set, then all pages in the object are valid targets
  709  *      for flushing.
  710  * => NOTE: we rely on the fact that the object's memq is a TAILQ and
  711  *      that new pages are inserted on the tail end of the list.  thus,
  712  *      we can make a complete pass through the object in one go by starting
  713  *      at the head and working towards the tail (new pages are put in
  714  *      front of us).
  715  * => NOTE: we are allowed to lock the page queues, so the caller
  716  *      must not be holding the lock on them [e.g. pagedaemon had
  717  *      better not call us with the queues locked]
  718  * => we return 0 unless we encountered some sort of I/O error
  719  *      XXXJRT currently never happens, as we never directly initiate
  720  *      XXXJRT I/O
  721  *
  722  * note on page traversal:
  723  *      we can traverse the pages in an object either by going down the
  724  *      linked list in "uobj->memq", or we can go over the address range
  725  *      by page doing hash table lookups for each address.  depending
  726  *      on how many pages are in the object it may be cheaper to do one
  727  *      or the other.  we set "by_list" to true if we are using memq.
  728  *      if the cost of a hash lookup was equal to the cost of the list
  729  *      traversal we could compare the number of pages in the start->stop
  730  *      range to the total number of pages in the object.  however, it
  731  *      seems that a hash table lookup is more expensive than the linked
  732  *      list traversal, so we multiply the number of pages in the
  733  *      start->stop range by a penalty which we define below.
  734  */
  735 
  736 static int
  737 uao_put(struct uvm_object *uobj, voff_t start, voff_t stop, int flags)
  738 {
  739         struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
  740         struct vm_page *pg, *nextpg, curmp, endmp;
  741         bool by_list;
  742         voff_t curoff;
  743         UVMHIST_FUNC("uao_put"); UVMHIST_CALLED(maphist);
  744 
  745         KASSERT(mutex_owned(&uobj->vmobjlock));
  746 
  747         curoff = 0;
  748         if (flags & PGO_ALLPAGES) {
  749                 start = 0;
  750                 stop = aobj->u_pages << PAGE_SHIFT;
  751                 by_list = true;         /* always go by the list */
  752         } else {
  753                 start = trunc_page(start);
  754                 if (stop == 0) {
  755                         stop = aobj->u_pages << PAGE_SHIFT;
  756                 } else {
  757                         stop = round_page(stop);
  758                 }
  759                 if (stop > (aobj->u_pages << PAGE_SHIFT)) {
  760                         printf("uao_flush: strange, got an out of range "
  761                             "flush (fixed)\n");
  762                         stop = aobj->u_pages << PAGE_SHIFT;
  763                 }
  764                 by_list = (uobj->uo_npages <=
  765                     ((stop - start) >> PAGE_SHIFT) * UVM_PAGE_HASH_PENALTY);
  766         }
  767         UVMHIST_LOG(maphist,
  768             " flush start=0x%lx, stop=0x%x, by_list=%d, flags=0x%x",
  769             start, stop, by_list, flags);
  770 
  771         /*
  772          * Don't need to do any work here if we're not freeing
  773          * or deactivating pages.
  774          */
  775 
  776         if ((flags & (PGO_DEACTIVATE|PGO_FREE)) == 0) {
  777                 mutex_exit(&uobj->vmobjlock);
  778                 return 0;
  779         }
  780 
  781         /*
  782          * Initialize the marker pages.  See the comment in
  783          * genfs_putpages() also.
  784          */
  785 
  786         curmp.uobject = uobj;
  787         curmp.offset = (voff_t)-1;
  788         curmp.flags = PG_BUSY;
  789         endmp.uobject = uobj;
  790         endmp.offset = (voff_t)-1;
  791         endmp.flags = PG_BUSY;
  792 
  793         /*
  794          * now do it.  note: we must update nextpg in the body of loop or we
  795          * will get stuck.  we need to use nextpg if we'll traverse the list
  796          * because we may free "pg" before doing the next loop.
  797          */
  798 
  799         if (by_list) {
  800                 TAILQ_INSERT_TAIL(&uobj->memq, &endmp, listq.queue);
  801                 nextpg = TAILQ_FIRST(&uobj->memq);
  802                 uvm_lwp_hold(curlwp);
  803         } else {
  804                 curoff = start;
  805                 nextpg = NULL;  /* Quell compiler warning */
  806         }
  807 
  808         /* locked: uobj */
  809         for (;;) {
  810                 if (by_list) {
  811                         pg = nextpg;
  812                         if (pg == &endmp)
  813                                 break;
  814                         nextpg = TAILQ_NEXT(pg, listq.queue);
  815                         if (pg->offset < start || pg->offset >= stop)
  816                                 continue;
  817                 } else {
  818                         if (curoff < stop) {
  819                                 pg = uvm_pagelookup(uobj, curoff);
  820                                 curoff += PAGE_SIZE;
  821                         } else
  822                                 break;
  823                         if (pg == NULL)
  824                                 continue;
  825                 }
  826 
  827                 /*
  828                  * wait and try again if the page is busy.
  829                  */
  830 
  831                 if (pg->flags & PG_BUSY) {
  832                         if (by_list) {
  833                                 TAILQ_INSERT_BEFORE(pg, &curmp, listq.queue);
  834                         }
  835                         pg->flags |= PG_WANTED;
  836                         UVM_UNLOCK_AND_WAIT(pg, &uobj->vmobjlock, 0,
  837                             "uao_put", 0);
  838                         mutex_enter(&uobj->vmobjlock);
  839                         if (by_list) {
  840                                 nextpg = TAILQ_NEXT(&curmp, listq.queue);
  841                                 TAILQ_REMOVE(&uobj->memq, &curmp,
  842                                     listq.queue);
  843                         } else
  844                                 curoff -= PAGE_SIZE;
  845                         continue;
  846                 }
  847 
  848                 switch (flags & (PGO_CLEANIT|PGO_FREE|PGO_DEACTIVATE)) {
  849 
  850                 /*
  851                  * XXX In these first 3 cases, we always just
  852                  * XXX deactivate the page.  We may want to
  853                  * XXX handle the different cases more specifically
  854                  * XXX in the future.
  855                  */
  856 
  857                 case PGO_CLEANIT|PGO_FREE:
  858                 case PGO_CLEANIT|PGO_DEACTIVATE:
  859                 case PGO_DEACTIVATE:
  860  deactivate_it:
  861                         mutex_enter(&uvm_pageqlock);
  862                         /* skip the page if it's wired */
  863                         if (pg->wire_count == 0) {
  864                                 uvm_pagedeactivate(pg);
  865                         }
  866                         mutex_exit(&uvm_pageqlock);
  867                         break;
  868 
  869                 case PGO_FREE:
  870                         /*
  871                          * If there are multiple references to
  872                          * the object, just deactivate the page.
  873                          */
  874 
  875                         if (uobj->uo_refs > 1)
  876                                 goto deactivate_it;
  877 
  878                         /*
  879                          * free the swap slot and the page.
  880                          */
  881 
  882                         pmap_page_protect(pg, VM_PROT_NONE);
  883 
  884                         /*
  885                          * freeing swapslot here is not strictly necessary.
  886                          * however, leaving it here doesn't save much
  887                          * because we need to update swap accounting anyway.
  888                          */
  889 
  890                         uao_dropswap(uobj, pg->offset >> PAGE_SHIFT);
  891                         mutex_enter(&uvm_pageqlock);
  892                         uvm_pagefree(pg);
  893                         mutex_exit(&uvm_pageqlock);
  894                         break;
  895 
  896                 default:
  897                         panic("%s: impossible", __func__);
  898                 }
  899         }
  900         if (by_list) {
  901                 TAILQ_REMOVE(&uobj->memq, &endmp, listq.queue);
  902                 uvm_lwp_rele(curlwp);
  903         }
  904         mutex_exit(&uobj->vmobjlock);
  905         return 0;
  906 }
  907 
  908 /*
  909  * uao_get: fetch me a page
  910  *
  911  * we have three cases:
  912  * 1: page is resident     -> just return the page.
  913  * 2: page is zero-fill    -> allocate a new page and zero it.
  914  * 3: page is swapped out  -> fetch the page from swap.
  915  *
  916  * cases 1 and 2 can be handled with PGO_LOCKED, case 3 cannot.
  917  * so, if the "center" page hits case 3 (or any page, with PGO_ALLPAGES),
  918  * then we will need to return EBUSY.
  919  *
  920  * => prefer map unlocked (not required)
  921  * => object must be locked!  we will _unlock_ it before starting any I/O.
  922  * => flags: PGO_ALLPAGES: get all of the pages
  923  *           PGO_LOCKED: fault data structures are locked
  924  * => NOTE: offset is the offset of pps[0], _NOT_ pps[centeridx]
  925  * => NOTE: caller must check for released pages!!
  926  */
  927 
  928 static int
  929 uao_get(struct uvm_object *uobj, voff_t offset, struct vm_page **pps,
  930     int *npagesp, int centeridx, vm_prot_t access_type, int advice, int flags)
  931 {
  932 #if defined(VMSWAP)
  933         struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
  934 #endif /* defined(VMSWAP) */
  935         voff_t current_offset;
  936         struct vm_page *ptmp = NULL;    /* Quell compiler warning */
  937         int lcv, gotpages, maxpages, swslot, pageidx;
  938         bool done;
  939         UVMHIST_FUNC("uao_get"); UVMHIST_CALLED(pdhist);
  940 
  941         UVMHIST_LOG(pdhist, "aobj=%p offset=%d, flags=%d",
  942                     (struct uvm_aobj *)uobj, offset, flags,0);
  943 
  944         /*
  945          * get number of pages
  946          */
  947 
  948         maxpages = *npagesp;
  949 
  950         /*
  951          * step 1: handled the case where fault data structures are locked.
  952          */
  953 
  954         if (flags & PGO_LOCKED) {
  955 
  956                 /*
  957                  * step 1a: get pages that are already resident.   only do
  958                  * this if the data structures are locked (i.e. the first
  959                  * time through).
  960                  */
  961 
  962                 done = true;    /* be optimistic */
  963                 gotpages = 0;   /* # of pages we got so far */
  964                 for (lcv = 0, current_offset = offset ; lcv < maxpages ;
  965                     lcv++, current_offset += PAGE_SIZE) {
  966                         /* do we care about this page?  if not, skip it */
  967                         if (pps[lcv] == PGO_DONTCARE)
  968                                 continue;
  969                         ptmp = uvm_pagelookup(uobj, current_offset);
  970 
  971                         /*
  972                          * if page is new, attempt to allocate the page,
  973                          * zero-fill'd.
  974                          */
  975 
  976                         if (ptmp == NULL && uao_find_swslot(&aobj->u_obj,
  977                             current_offset >> PAGE_SHIFT) == 0) {
  978                                 ptmp = uvm_pagealloc(uobj, current_offset,
  979                                     NULL, UVM_PGA_ZERO);
  980                                 if (ptmp) {
  981                                         /* new page */
  982                                         ptmp->flags &= ~(PG_FAKE);
  983                                         ptmp->pqflags |= PQ_AOBJ;
  984                                         goto gotpage;
  985                                 }
  986                         }
  987 
  988                         /*
  989                          * to be useful must get a non-busy page
  990                          */
  991 
  992                         if (ptmp == NULL || (ptmp->flags & PG_BUSY) != 0) {
  993                                 if (lcv == centeridx ||
  994                                     (flags & PGO_ALLPAGES) != 0)
  995                                         /* need to do a wait or I/O! */
  996                                         done = false;
  997                                         continue;
  998                         }
  999 
 1000                         /*
 1001                          * useful page: busy/lock it and plug it in our
 1002                          * result array
 1003                          */
 1004 
 1005                         /* caller must un-busy this page */
 1006                         ptmp->flags |= PG_BUSY;
 1007                         UVM_PAGE_OWN(ptmp, "uao_get1");
 1008 gotpage:
 1009                         pps[lcv] = ptmp;
 1010                         gotpages++;
 1011                 }
 1012 
 1013                 /*
 1014                  * step 1b: now we've either done everything needed or we
 1015                  * to unlock and do some waiting or I/O.
 1016                  */
 1017 
 1018                 UVMHIST_LOG(pdhist, "<- done (done=%d)", done, 0,0,0);
 1019                 *npagesp = gotpages;
 1020                 if (done)
 1021                         return 0;
 1022                 else
 1023                         return EBUSY;
 1024         }
 1025 
 1026         /*
 1027          * step 2: get non-resident or busy pages.
 1028          * object is locked.   data structures are unlocked.
 1029          */
 1030 
 1031         if ((flags & PGO_SYNCIO) == 0) {
 1032                 goto done;
 1033         }
 1034 
 1035         for (lcv = 0, current_offset = offset ; lcv < maxpages ;
 1036             lcv++, current_offset += PAGE_SIZE) {
 1037 
 1038                 /*
 1039                  * - skip over pages we've already gotten or don't want
 1040                  * - skip over pages we don't _have_ to get
 1041                  */
 1042 
 1043                 if (pps[lcv] != NULL ||
 1044                     (lcv != centeridx && (flags & PGO_ALLPAGES) == 0))
 1045                         continue;
 1046 
 1047                 pageidx = current_offset >> PAGE_SHIFT;
 1048 
 1049                 /*
 1050                  * we have yet to locate the current page (pps[lcv]).   we
 1051                  * first look for a page that is already at the current offset.
 1052                  * if we find a page, we check to see if it is busy or
 1053                  * released.  if that is the case, then we sleep on the page
 1054                  * until it is no longer busy or released and repeat the lookup.
 1055                  * if the page we found is neither busy nor released, then we
 1056                  * busy it (so we own it) and plug it into pps[lcv].   this
 1057                  * 'break's the following while loop and indicates we are
 1058                  * ready to move on to the next page in the "lcv" loop above.
 1059                  *
 1060                  * if we exit the while loop with pps[lcv] still set to NULL,
 1061                  * then it means that we allocated a new busy/fake/clean page
 1062                  * ptmp in the object and we need to do I/O to fill in the data.
 1063                  */
 1064 
 1065                 /* top of "pps" while loop */
 1066                 while (pps[lcv] == NULL) {
 1067                         /* look for a resident page */
 1068                         ptmp = uvm_pagelookup(uobj, current_offset);
 1069 
 1070                         /* not resident?   allocate one now (if we can) */
 1071                         if (ptmp == NULL) {
 1072 
 1073                                 ptmp = uvm_pagealloc(uobj, current_offset,
 1074                                     NULL, 0);
 1075 
 1076                                 /* out of RAM? */
 1077                                 if (ptmp == NULL) {
 1078                                         mutex_exit(&uobj->vmobjlock);
 1079                                         UVMHIST_LOG(pdhist,
 1080                                             "sleeping, ptmp == NULL\n",0,0,0,0);
 1081                                         uvm_wait("uao_getpage");
 1082                                         mutex_enter(&uobj->vmobjlock);
 1083                                         continue;
 1084                                 }
 1085 
 1086                                 /*
 1087                                  * safe with PQ's unlocked: because we just
 1088                                  * alloc'd the page
 1089                                  */
 1090 
 1091                                 ptmp->pqflags |= PQ_AOBJ;
 1092 
 1093                                 /*
 1094                                  * got new page ready for I/O.  break pps while
 1095                                  * loop.  pps[lcv] is still NULL.
 1096                                  */
 1097 
 1098                                 break;
 1099                         }
 1100 
 1101                         /* page is there, see if we need to wait on it */
 1102                         if ((ptmp->flags & PG_BUSY) != 0) {
 1103                                 ptmp->flags |= PG_WANTED;
 1104                                 UVMHIST_LOG(pdhist,
 1105                                     "sleeping, ptmp->flags 0x%x\n",
 1106                                     ptmp->flags,0,0,0);
 1107                                 UVM_UNLOCK_AND_WAIT(ptmp, &uobj->vmobjlock,
 1108                                     false, "uao_get", 0);
 1109                                 mutex_enter(&uobj->vmobjlock);
 1110                                 continue;
 1111                         }
 1112 
 1113                         /*
 1114                          * if we get here then the page has become resident and
 1115                          * unbusy between steps 1 and 2.  we busy it now (so we
 1116                          * own it) and set pps[lcv] (so that we exit the while
 1117                          * loop).
 1118                          */
 1119 
 1120                         /* we own it, caller must un-busy */
 1121                         ptmp->flags |= PG_BUSY;
 1122                         UVM_PAGE_OWN(ptmp, "uao_get2");
 1123                         pps[lcv] = ptmp;
 1124                 }
 1125 
 1126                 /*
 1127                  * if we own the valid page at the correct offset, pps[lcv] will
 1128                  * point to it.   nothing more to do except go to the next page.
 1129                  */
 1130 
 1131                 if (pps[lcv])
 1132                         continue;                       /* next lcv */
 1133 
 1134                 /*
 1135                  * we have a "fake/busy/clean" page that we just allocated.
 1136                  * do the needed "i/o", either reading from swap or zeroing.
 1137                  */
 1138 
 1139                 swslot = uao_find_swslot(&aobj->u_obj, pageidx);
 1140 
 1141                 /*
 1142                  * just zero the page if there's nothing in swap.
 1143                  */
 1144 
 1145                 if (swslot == 0) {
 1146 
 1147                         /*
 1148                          * page hasn't existed before, just zero it.
 1149                          */
 1150 
 1151                         uvm_pagezero(ptmp);
 1152                 } else {
 1153 #if defined(VMSWAP)
 1154                         int error;
 1155 
 1156                         UVMHIST_LOG(pdhist, "pagein from swslot %d",
 1157                              swslot, 0,0,0);
 1158 
 1159                         /*
 1160                          * page in the swapped-out page.
 1161                          * unlock object for i/o, relock when done.
 1162                          */
 1163 
 1164                         mutex_exit(&uobj->vmobjlock);
 1165                         error = uvm_swap_get(ptmp, swslot, PGO_SYNCIO);
 1166                         mutex_enter(&uobj->vmobjlock);
 1167 
 1168                         /*
 1169                          * I/O done.  check for errors.
 1170                          */
 1171 
 1172                         if (error != 0) {
 1173                                 UVMHIST_LOG(pdhist, "<- done (error=%d)",
 1174                                     error,0,0,0);
 1175                                 if (ptmp->flags & PG_WANTED)
 1176                                         wakeup(ptmp);
 1177 
 1178                                 /*
 1179                                  * remove the swap slot from the aobj
 1180                                  * and mark the aobj as having no real slot.
 1181                                  * don't free the swap slot, thus preventing
 1182                                  * it from being used again.
 1183                                  */
 1184 
 1185                                 swslot = uao_set_swslot(&aobj->u_obj, pageidx,
 1186                                                         SWSLOT_BAD);
 1187                                 if (swslot > 0) {
 1188                                         uvm_swap_markbad(swslot, 1);
 1189                                 }
 1190 
 1191                                 mutex_enter(&uvm_pageqlock);
 1192                                 uvm_pagefree(ptmp);
 1193                                 mutex_exit(&uvm_pageqlock);
 1194                                 mutex_exit(&uobj->vmobjlock);
 1195                                 return error;
 1196                         }
 1197 #else /* defined(VMSWAP) */
 1198                         panic("%s: pagein", __func__);
 1199 #endif /* defined(VMSWAP) */
 1200                 }
 1201 
 1202                 if ((access_type & VM_PROT_WRITE) == 0) {
 1203                         ptmp->flags |= PG_CLEAN;
 1204                         pmap_clear_modify(ptmp);
 1205                 }
 1206 
 1207                 /*
 1208                  * we got the page!   clear the fake flag (indicates valid
 1209                  * data now in page) and plug into our result array.   note
 1210                  * that page is still busy.
 1211                  *
 1212                  * it is the callers job to:
 1213                  * => check if the page is released
 1214                  * => unbusy the page
 1215                  * => activate the page
 1216                  */
 1217 
 1218                 ptmp->flags &= ~PG_FAKE;
 1219                 pps[lcv] = ptmp;
 1220         }
 1221 
 1222         /*
 1223          * finally, unlock object and return.
 1224          */
 1225 
 1226 done:
 1227         mutex_exit(&uobj->vmobjlock);
 1228         UVMHIST_LOG(pdhist, "<- done (OK)",0,0,0,0);
 1229         return 0;
 1230 }
 1231 
 1232 #if defined(VMSWAP)
 1233 
 1234 /*
 1235  * uao_dropswap:  release any swap resources from this aobj page.
 1236  *
 1237  * => aobj must be locked or have a reference count of 0.
 1238  */
 1239 
 1240 void
 1241 uao_dropswap(struct uvm_object *uobj, int pageidx)
 1242 {
 1243         int slot;
 1244 
 1245         slot = uao_set_swslot(uobj, pageidx, 0);
 1246         if (slot) {
 1247                 uvm_swap_free(slot, 1);
 1248         }
 1249 }
 1250 
 1251 /*
 1252  * page in every page in every aobj that is paged-out to a range of swslots.
 1253  *
 1254  * => nothing should be locked.
 1255  * => returns true if pagein was aborted due to lack of memory.
 1256  */
 1257 
 1258 bool
 1259 uao_swap_off(int startslot, int endslot)
 1260 {
 1261         struct uvm_aobj *aobj, *nextaobj;
 1262         bool rv;
 1263 
 1264         /*
 1265          * walk the list of all aobjs.
 1266          */
 1267 
 1268 restart:
 1269         mutex_enter(&uao_list_lock);
 1270         for (aobj = LIST_FIRST(&uao_list);
 1271              aobj != NULL;
 1272              aobj = nextaobj) {
 1273 
 1274                 /*
 1275                  * try to get the object lock, start all over if we fail.
 1276                  * most of the time we'll get the aobj lock,
 1277                  * so this should be a rare case.
 1278                  */
 1279 
 1280                 if (!mutex_tryenter(&aobj->u_obj.vmobjlock)) {
 1281                         mutex_exit(&uao_list_lock);
 1282                         /* XXX Better than yielding but inadequate. */
 1283                         kpause("livelock", false, 1, NULL);
 1284                         goto restart;
 1285                 }
 1286 
 1287                 /*
 1288                  * add a ref to the aobj so it doesn't disappear
 1289                  * while we're working.
 1290                  */
 1291 
 1292                 uao_reference_locked(&aobj->u_obj);
 1293 
 1294                 /*
 1295                  * now it's safe to unlock the uao list.
 1296                  */
 1297 
 1298                 mutex_exit(&uao_list_lock);
 1299 
 1300                 /*
 1301                  * page in any pages in the swslot range.
 1302                  * if there's an error, abort and return the error.
 1303                  */
 1304 
 1305                 rv = uao_pagein(aobj, startslot, endslot);
 1306                 if (rv) {
 1307                         uao_detach_locked(&aobj->u_obj);
 1308                         return rv;
 1309                 }
 1310 
 1311                 /*
 1312                  * we're done with this aobj.
 1313                  * relock the list and drop our ref on the aobj.
 1314                  */
 1315 
 1316                 mutex_enter(&uao_list_lock);
 1317                 nextaobj = LIST_NEXT(aobj, u_list);
 1318                 uao_detach_locked(&aobj->u_obj);
 1319         }
 1320 
 1321         /*
 1322          * done with traversal, unlock the list
 1323          */
 1324         mutex_exit(&uao_list_lock);
 1325         return false;
 1326 }
 1327 
 1328 
 1329 /*
 1330  * page in any pages from aobj in the given range.
 1331  *
 1332  * => aobj must be locked and is returned locked.
 1333  * => returns true if pagein was aborted due to lack of memory.
 1334  */
 1335 static bool
 1336 uao_pagein(struct uvm_aobj *aobj, int startslot, int endslot)
 1337 {
 1338         bool rv;
 1339 
 1340         if (UAO_USES_SWHASH(aobj)) {
 1341                 struct uao_swhash_elt *elt;
 1342                 int buck;
 1343 
 1344 restart:
 1345                 for (buck = aobj->u_swhashmask; buck >= 0; buck--) {
 1346                         for (elt = LIST_FIRST(&aobj->u_swhash[buck]);
 1347                              elt != NULL;
 1348                              elt = LIST_NEXT(elt, list)) {
 1349                                 int i;
 1350 
 1351                                 for (i = 0; i < UAO_SWHASH_CLUSTER_SIZE; i++) {
 1352                                         int slot = elt->slots[i];
 1353 
 1354                                         /*
 1355                                          * if the slot isn't in range, skip it.
 1356                                          */
 1357 
 1358                                         if (slot < startslot ||
 1359                                             slot >= endslot) {
 1360                                                 continue;
 1361                                         }
 1362 
 1363                                         /*
 1364                                          * process the page,
 1365                                          * the start over on this object
 1366                                          * since the swhash elt
 1367                                          * may have been freed.
 1368                                          */
 1369 
 1370                                         rv = uao_pagein_page(aobj,
 1371                                           UAO_SWHASH_ELT_PAGEIDX_BASE(elt) + i);
 1372                                         if (rv) {
 1373                                                 return rv;
 1374                                         }
 1375                                         goto restart;
 1376                                 }
 1377                         }
 1378                 }
 1379         } else {
 1380                 int i;
 1381 
 1382                 for (i = 0; i < aobj->u_pages; i++) {
 1383                         int slot = aobj->u_swslots[i];
 1384 
 1385                         /*
 1386                          * if the slot isn't in range, skip it
 1387                          */
 1388 
 1389                         if (slot < startslot || slot >= endslot) {
 1390                                 continue;
 1391                         }
 1392 
 1393                         /*
 1394                          * process the page.
 1395                          */
 1396 
 1397                         rv = uao_pagein_page(aobj, i);
 1398                         if (rv) {
 1399                                 return rv;
 1400                         }
 1401                 }
 1402         }
 1403 
 1404         return false;
 1405 }
 1406 
 1407 /*
 1408  * page in a page from an aobj.  used for swap_off.
 1409  * returns true if pagein was aborted due to lack of memory.
 1410  *
 1411  * => aobj must be locked and is returned locked.
 1412  */
 1413 
 1414 static bool
 1415 uao_pagein_page(struct uvm_aobj *aobj, int pageidx)
 1416 {
 1417         struct vm_page *pg;
 1418         int rv, npages;
 1419 
 1420         pg = NULL;
 1421         npages = 1;
 1422         /* locked: aobj */
 1423         rv = uao_get(&aobj->u_obj, pageidx << PAGE_SHIFT,
 1424             &pg, &npages, 0, VM_PROT_READ|VM_PROT_WRITE, 0, PGO_SYNCIO);
 1425         /* unlocked: aobj */
 1426 
 1427         /*
 1428          * relock and finish up.
 1429          */
 1430 
 1431         mutex_enter(&aobj->u_obj.vmobjlock);
 1432         switch (rv) {
 1433         case 0:
 1434                 break;
 1435 
 1436         case EIO:
 1437         case ERESTART:
 1438 
 1439                 /*
 1440                  * nothing more to do on errors.
 1441                  * ERESTART can only mean that the anon was freed,
 1442                  * so again there's nothing to do.
 1443                  */
 1444 
 1445                 return false;
 1446 
 1447         default:
 1448                 return true;
 1449         }
 1450 
 1451         /*
 1452          * ok, we've got the page now.
 1453          * mark it as dirty, clear its swslot and un-busy it.
 1454          */
 1455         uao_dropswap(&aobj->u_obj, pageidx);
 1456 
 1457         /*
 1458          * make sure it's on a page queue.
 1459          */
 1460         mutex_enter(&uvm_pageqlock);
 1461         if (pg->wire_count == 0)
 1462                 uvm_pageenqueue(pg);
 1463         mutex_exit(&uvm_pageqlock);
 1464 
 1465         if (pg->flags & PG_WANTED) {
 1466                 wakeup(pg);
 1467         }
 1468         pg->flags &= ~(PG_WANTED|PG_BUSY|PG_CLEAN|PG_FAKE);
 1469         UVM_PAGE_OWN(pg, NULL);
 1470 
 1471         return false;
 1472 }
 1473 
 1474 /*
 1475  * uao_dropswap_range: drop swapslots in the range.
 1476  *
 1477  * => aobj must be locked and is returned locked.
 1478  * => start is inclusive.  end is exclusive.
 1479  */
 1480 
 1481 void
 1482 uao_dropswap_range(struct uvm_object *uobj, voff_t start, voff_t end)
 1483 {
 1484         struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
 1485 
 1486         KASSERT(mutex_owned(&uobj->vmobjlock));
 1487 
 1488         uao_dropswap_range1(aobj, start, end);
 1489 }
 1490 
 1491 static void
 1492 uao_dropswap_range1(struct uvm_aobj *aobj, voff_t start, voff_t end)
 1493 {
 1494         int swpgonlydelta = 0;
 1495 
 1496         if (end == 0) {
 1497                 end = INT64_MAX;
 1498         }
 1499 
 1500         if (UAO_USES_SWHASH(aobj)) {
 1501                 int i, hashbuckets = aobj->u_swhashmask + 1;
 1502                 voff_t taghi;
 1503                 voff_t taglo;
 1504 
 1505                 taglo = UAO_SWHASH_ELT_TAG(start);
 1506                 taghi = UAO_SWHASH_ELT_TAG(end);
 1507 
 1508                 for (i = 0; i < hashbuckets; i++) {
 1509                         struct uao_swhash_elt *elt, *next;
 1510 
 1511                         for (elt = LIST_FIRST(&aobj->u_swhash[i]);
 1512                              elt != NULL;
 1513                              elt = next) {
 1514                                 int startidx, endidx;
 1515                                 int j;
 1516 
 1517                                 next = LIST_NEXT(elt, list);
 1518 
 1519                                 if (elt->tag < taglo || taghi < elt->tag) {
 1520                                         continue;
 1521                                 }
 1522 
 1523                                 if (elt->tag == taglo) {
 1524                                         startidx =
 1525                                             UAO_SWHASH_ELT_PAGESLOT_IDX(start);
 1526                                 } else {
 1527                                         startidx = 0;
 1528                                 }
 1529 
 1530                                 if (elt->tag == taghi) {
 1531                                         endidx =
 1532                                             UAO_SWHASH_ELT_PAGESLOT_IDX(end);
 1533                                 } else {
 1534                                         endidx = UAO_SWHASH_CLUSTER_SIZE;
 1535                                 }
 1536 
 1537                                 for (j = startidx; j < endidx; j++) {
 1538                                         int slot = elt->slots[j];
 1539 
 1540                                         KASSERT(uvm_pagelookup(&aobj->u_obj,
 1541                                             (UAO_SWHASH_ELT_PAGEIDX_BASE(elt)
 1542                                             + j) << PAGE_SHIFT) == NULL);
 1543                                         if (slot > 0) {
 1544                                                 uvm_swap_free(slot, 1);
 1545                                                 swpgonlydelta++;
 1546                                                 KASSERT(elt->count > 0);
 1547                                                 elt->slots[j] = 0;
 1548                                                 elt->count--;
 1549                                         }
 1550                                 }
 1551 
 1552                                 if (elt->count == 0) {
 1553                                         LIST_REMOVE(elt, list);
 1554                                         pool_put(&uao_swhash_elt_pool, elt);
 1555                                 }
 1556                         }
 1557                 }
 1558         } else {
 1559                 int i;
 1560 
 1561                 if (aobj->u_pages < end) {
 1562                         end = aobj->u_pages;
 1563                 }
 1564                 for (i = start; i < end; i++) {
 1565                         int slot = aobj->u_swslots[i];
 1566 
 1567                         if (slot > 0) {
 1568                                 uvm_swap_free(slot, 1);
 1569                                 swpgonlydelta++;
 1570                         }
 1571                 }
 1572         }
 1573 
 1574         /*
 1575          * adjust the counter of pages only in swap for all
 1576          * the swap slots we've freed.
 1577          */
 1578 
 1579         if (swpgonlydelta > 0) {
 1580                 mutex_enter(&uvm_swap_data_lock);
 1581                 KASSERT(uvmexp.swpgonly >= swpgonlydelta);
 1582                 uvmexp.swpgonly -= swpgonlydelta;
 1583                 mutex_exit(&uvm_swap_data_lock);
 1584         }
 1585 }
 1586 
 1587 #endif /* defined(VMSWAP) */

Cache object: b7b2bbf74cca9232ded2cb4175bae72f


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.