The Design and Implementation of the FreeBSD Operating System, Second Edition
[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/uvm/uvm_aobj.c

Version: -  FREEBSD  -  FREEBSD10  -  FREEBSD9  -  FREEBSD92  -  FREEBSD91  -  FREEBSD90  -  FREEBSD8  -  FREEBSD82  -  FREEBSD81  -  FREEBSD80  -  FREEBSD7  -  FREEBSD74  -  FREEBSD73  -  FREEBSD72  -  FREEBSD71  -  FREEBSD70  -  FREEBSD6  -  FREEBSD64  -  FREEBSD63  -  FREEBSD62  -  FREEBSD61  -  FREEBSD60  -  FREEBSD5  -  FREEBSD55  -  FREEBSD54  -  FREEBSD53  -  FREEBSD52  -  FREEBSD51  -  FREEBSD50  -  FREEBSD4  -  FREEBSD3  -  FREEBSD22  -  cheribsd  -  linux-2.6  -  linux-2.4.22  -  MK83  -  MK84  -  PLAN9  -  DFBSD  -  NETBSD  -  NETBSD5  -  NETBSD4  -  NETBSD3  -  NETBSD20  -  OPENBSD  -  xnu-517  -  xnu-792  -  xnu-792.6.70  -  xnu-1228  -  xnu-1456.1.26  -  xnu-1699.24.8  -  xnu-2050.18.24  -  OPENSOLARIS  -  minix-3-1-1  -  FREEBSD-LIBC  -  FREEBSD8-LIBC  -  FREEBSD7-LIBC  -  FREEBSD6-LIBC  -  GLIBC27 
SearchContext: -  none  -  3  -  10 

    1 /*      $NetBSD: uvm_aobj.c,v 1.114 2011/04/23 18:14:12 rmind Exp $     */
    2 
    3 /*
    4  * Copyright (c) 1998 Chuck Silvers, Charles D. Cranor and
    5  *                    Washington University.
    6  * All rights reserved.
    7  *
    8  * Redistribution and use in source and binary forms, with or without
    9  * modification, are permitted provided that the following conditions
   10  * are met:
   11  * 1. Redistributions of source code must retain the above copyright
   12  *    notice, this list of conditions and the following disclaimer.
   13  * 2. Redistributions in binary form must reproduce the above copyright
   14  *    notice, this list of conditions and the following disclaimer in the
   15  *    documentation and/or other materials provided with the distribution.
   16  *
   17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
   18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
   19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
   20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
   21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
   22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
   23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
   24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
   25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
   26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
   27  *
   28  * from: Id: uvm_aobj.c,v 1.1.2.5 1998/02/06 05:14:38 chs Exp
   29  */
   30 
   31 /*
   32  * uvm_aobj.c: anonymous memory uvm_object pager
   33  *
   34  * author: Chuck Silvers <chuq@chuq.com>
   35  * started: Jan-1998
   36  *
   37  * - design mostly from Chuck Cranor
   38  */
   39 
   40 #include <sys/cdefs.h>
   41 __KERNEL_RCSID(0, "$NetBSD: uvm_aobj.c,v 1.114 2011/04/23 18:14:12 rmind Exp $");
   42 
   43 #include "opt_uvmhist.h"
   44 
   45 #include <sys/param.h>
   46 #include <sys/systm.h>
   47 #include <sys/proc.h>
   48 #include <sys/kernel.h>
   49 #include <sys/kmem.h>
   50 #include <sys/pool.h>
   51 
   52 #include <uvm/uvm.h>
   53 
   54 /*
   55  * an aobj manages anonymous-memory backed uvm_objects.   in addition
   56  * to keeping the list of resident pages, it also keeps a list of
   57  * allocated swap blocks.  depending on the size of the aobj this list
   58  * of allocated swap blocks is either stored in an array (small objects)
   59  * or in a hash table (large objects).
   60  */
   61 
   62 /*
   63  * local structures
   64  */
   65 
   66 /*
   67  * for hash tables, we break the address space of the aobj into blocks
   68  * of UAO_SWHASH_CLUSTER_SIZE pages.   we require the cluster size to
   69  * be a power of two.
   70  */
   71 
   72 #define UAO_SWHASH_CLUSTER_SHIFT 4
   73 #define UAO_SWHASH_CLUSTER_SIZE (1 << UAO_SWHASH_CLUSTER_SHIFT)
   74 
   75 /* get the "tag" for this page index */
   76 #define UAO_SWHASH_ELT_TAG(PAGEIDX) \
   77         ((PAGEIDX) >> UAO_SWHASH_CLUSTER_SHIFT)
   78 
   79 #define UAO_SWHASH_ELT_PAGESLOT_IDX(PAGEIDX) \
   80         ((PAGEIDX) & (UAO_SWHASH_CLUSTER_SIZE - 1))
   81 
   82 /* given an ELT and a page index, find the swap slot */
   83 #define UAO_SWHASH_ELT_PAGESLOT(ELT, PAGEIDX) \
   84         ((ELT)->slots[UAO_SWHASH_ELT_PAGESLOT_IDX(PAGEIDX)])
   85 
   86 /* given an ELT, return its pageidx base */
   87 #define UAO_SWHASH_ELT_PAGEIDX_BASE(ELT) \
   88         ((ELT)->tag << UAO_SWHASH_CLUSTER_SHIFT)
   89 
   90 /*
   91  * the swhash hash function
   92  */
   93 
   94 #define UAO_SWHASH_HASH(AOBJ, PAGEIDX) \
   95         (&(AOBJ)->u_swhash[(((PAGEIDX) >> UAO_SWHASH_CLUSTER_SHIFT) \
   96                             & (AOBJ)->u_swhashmask)])
   97 
   98 /*
   99  * the swhash threshhold determines if we will use an array or a
  100  * hash table to store the list of allocated swap blocks.
  101  */
  102 
  103 #define UAO_SWHASH_THRESHOLD (UAO_SWHASH_CLUSTER_SIZE * 4)
  104 #define UAO_USES_SWHASH(AOBJ) \
  105         ((AOBJ)->u_pages > UAO_SWHASH_THRESHOLD)        /* use hash? */
  106 
  107 /*
  108  * the number of buckets in a swhash, with an upper bound
  109  */
  110 
  111 #define UAO_SWHASH_MAXBUCKETS 256
  112 #define UAO_SWHASH_BUCKETS(AOBJ) \
  113         (MIN((AOBJ)->u_pages >> UAO_SWHASH_CLUSTER_SHIFT, \
  114              UAO_SWHASH_MAXBUCKETS))
  115 
  116 /*
  117  * uao_swhash_elt: when a hash table is being used, this structure defines
  118  * the format of an entry in the bucket list.
  119  */
  120 
  121 struct uao_swhash_elt {
  122         LIST_ENTRY(uao_swhash_elt) list;        /* the hash list */
  123         voff_t tag;                             /* our 'tag' */
  124         int count;                              /* our number of active slots */
  125         int slots[UAO_SWHASH_CLUSTER_SIZE];     /* the slots */
  126 };
  127 
  128 /*
  129  * uao_swhash: the swap hash table structure
  130  */
  131 
  132 LIST_HEAD(uao_swhash, uao_swhash_elt);
  133 
  134 /*
  135  * uao_swhash_elt_pool: pool of uao_swhash_elt structures.
  136  * Note: pages for this pool must not come from a pageable kernel map.
  137  */
  138 static struct pool uao_swhash_elt_pool;
  139 
  140 /*
  141  * uvm_aobj: the actual anon-backed uvm_object
  142  *
  143  * => the uvm_object is at the top of the structure, this allows
  144  *   (struct uvm_aobj *) == (struct uvm_object *)
  145  * => only one of u_swslots and u_swhash is used in any given aobj
  146  */
  147 
  148 struct uvm_aobj {
  149         struct uvm_object u_obj; /* has: lock, pgops, memq, #pages, #refs */
  150         pgoff_t u_pages;         /* number of pages in entire object */
  151         int u_flags;             /* the flags (see uvm_aobj.h) */
  152         int *u_swslots;          /* array of offset->swapslot mappings */
  153                                  /*
  154                                   * hashtable of offset->swapslot mappings
  155                                   * (u_swhash is an array of bucket heads)
  156                                   */
  157         struct uao_swhash *u_swhash;
  158         u_long u_swhashmask;            /* mask for hashtable */
  159         LIST_ENTRY(uvm_aobj) u_list;    /* global list of aobjs */
  160 };
  161 
  162 /*
  163  * local functions
  164  */
  165 
  166 static void     uao_free(struct uvm_aobj *);
  167 static int      uao_get(struct uvm_object *, voff_t, struct vm_page **,
  168                     int *, int, vm_prot_t, int, int);
  169 static int      uao_put(struct uvm_object *, voff_t, voff_t, int);
  170 
  171 static void uao_detach_locked(struct uvm_object *);
  172 static void uao_reference_locked(struct uvm_object *);
  173 
  174 #if defined(VMSWAP)
  175 static struct uao_swhash_elt *uao_find_swhash_elt
  176     (struct uvm_aobj *, int, bool);
  177 
  178 static bool uao_pagein(struct uvm_aobj *, int, int);
  179 static bool uao_pagein_page(struct uvm_aobj *, int);
  180 static void uao_dropswap_range1(struct uvm_aobj *, voff_t, voff_t);
  181 #endif /* defined(VMSWAP) */
  182 
  183 /*
  184  * aobj_pager
  185  *
  186  * note that some functions (e.g. put) are handled elsewhere
  187  */
  188 
  189 const struct uvm_pagerops aobj_pager = {
  190         .pgo_reference = uao_reference,
  191         .pgo_detach = uao_detach,
  192         .pgo_get = uao_get,
  193         .pgo_put = uao_put,
  194 };
  195 
  196 /*
  197  * uao_list: global list of active aobjs, locked by uao_list_lock
  198  */
  199 
  200 static LIST_HEAD(aobjlist, uvm_aobj) uao_list;
  201 static kmutex_t uao_list_lock;
  202 
  203 /*
  204  * functions
  205  */
  206 
  207 /*
  208  * hash table/array related functions
  209  */
  210 
  211 #if defined(VMSWAP)
  212 
  213 /*
  214  * uao_find_swhash_elt: find (or create) a hash table entry for a page
  215  * offset.
  216  *
  217  * => the object should be locked by the caller
  218  */
  219 
  220 static struct uao_swhash_elt *
  221 uao_find_swhash_elt(struct uvm_aobj *aobj, int pageidx, bool create)
  222 {
  223         struct uao_swhash *swhash;
  224         struct uao_swhash_elt *elt;
  225         voff_t page_tag;
  226 
  227         swhash = UAO_SWHASH_HASH(aobj, pageidx);
  228         page_tag = UAO_SWHASH_ELT_TAG(pageidx);
  229 
  230         /*
  231          * now search the bucket for the requested tag
  232          */
  233 
  234         LIST_FOREACH(elt, swhash, list) {
  235                 if (elt->tag == page_tag) {
  236                         return elt;
  237                 }
  238         }
  239         if (!create) {
  240                 return NULL;
  241         }
  242 
  243         /*
  244          * allocate a new entry for the bucket and init/insert it in
  245          */
  246 
  247         elt = pool_get(&uao_swhash_elt_pool, PR_NOWAIT);
  248         if (elt == NULL) {
  249                 return NULL;
  250         }
  251         LIST_INSERT_HEAD(swhash, elt, list);
  252         elt->tag = page_tag;
  253         elt->count = 0;
  254         memset(elt->slots, 0, sizeof(elt->slots));
  255         return elt;
  256 }
  257 
  258 /*
  259  * uao_find_swslot: find the swap slot number for an aobj/pageidx
  260  *
  261  * => object must be locked by caller
  262  */
  263 
  264 int
  265 uao_find_swslot(struct uvm_object *uobj, int pageidx)
  266 {
  267         struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
  268         struct uao_swhash_elt *elt;
  269 
  270         /*
  271          * if noswap flag is set, then we never return a slot
  272          */
  273 
  274         if (aobj->u_flags & UAO_FLAG_NOSWAP)
  275                 return(0);
  276 
  277         /*
  278          * if hashing, look in hash table.
  279          */
  280 
  281         if (UAO_USES_SWHASH(aobj)) {
  282                 elt = uao_find_swhash_elt(aobj, pageidx, false);
  283                 if (elt)
  284                         return(UAO_SWHASH_ELT_PAGESLOT(elt, pageidx));
  285                 else
  286                         return(0);
  287         }
  288 
  289         /*
  290          * otherwise, look in the array
  291          */
  292 
  293         return(aobj->u_swslots[pageidx]);
  294 }
  295 
  296 /*
  297  * uao_set_swslot: set the swap slot for a page in an aobj.
  298  *
  299  * => setting a slot to zero frees the slot
  300  * => object must be locked by caller
  301  * => we return the old slot number, or -1 if we failed to allocate
  302  *    memory to record the new slot number
  303  */
  304 
  305 int
  306 uao_set_swslot(struct uvm_object *uobj, int pageidx, int slot)
  307 {
  308         struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
  309         struct uao_swhash_elt *elt;
  310         int oldslot;
  311         UVMHIST_FUNC("uao_set_swslot"); UVMHIST_CALLED(pdhist);
  312         UVMHIST_LOG(pdhist, "aobj %p pageidx %d slot %d",
  313             aobj, pageidx, slot, 0);
  314 
  315         KASSERT(mutex_owned(&uobj->vmobjlock) || uobj->uo_refs == 0);
  316 
  317         /*
  318          * if noswap flag is set, then we can't set a non-zero slot.
  319          */
  320 
  321         if (aobj->u_flags & UAO_FLAG_NOSWAP) {
  322                 if (slot == 0)
  323                         return(0);
  324 
  325                 printf("uao_set_swslot: uobj = %p\n", uobj);
  326                 panic("uao_set_swslot: NOSWAP object");
  327         }
  328 
  329         /*
  330          * are we using a hash table?  if so, add it in the hash.
  331          */
  332 
  333         if (UAO_USES_SWHASH(aobj)) {
  334 
  335                 /*
  336                  * Avoid allocating an entry just to free it again if
  337                  * the page had not swap slot in the first place, and
  338                  * we are freeing.
  339                  */
  340 
  341                 elt = uao_find_swhash_elt(aobj, pageidx, slot != 0);
  342                 if (elt == NULL) {
  343                         return slot ? -1 : 0;
  344                 }
  345 
  346                 oldslot = UAO_SWHASH_ELT_PAGESLOT(elt, pageidx);
  347                 UAO_SWHASH_ELT_PAGESLOT(elt, pageidx) = slot;
  348 
  349                 /*
  350                  * now adjust the elt's reference counter and free it if we've
  351                  * dropped it to zero.
  352                  */
  353 
  354                 if (slot) {
  355                         if (oldslot == 0)
  356                                 elt->count++;
  357                 } else {
  358                         if (oldslot)
  359                                 elt->count--;
  360 
  361                         if (elt->count == 0) {
  362                                 LIST_REMOVE(elt, list);
  363                                 pool_put(&uao_swhash_elt_pool, elt);
  364                         }
  365                 }
  366         } else {
  367                 /* we are using an array */
  368                 oldslot = aobj->u_swslots[pageidx];
  369                 aobj->u_swslots[pageidx] = slot;
  370         }
  371         return (oldslot);
  372 }
  373 
  374 #endif /* defined(VMSWAP) */
  375 
  376 /*
  377  * end of hash/array functions
  378  */
  379 
  380 /*
  381  * uao_free: free all resources held by an aobj, and then free the aobj
  382  *
  383  * => the aobj should be dead
  384  */
  385 
  386 static void
  387 uao_free(struct uvm_aobj *aobj)
  388 {
  389 
  390 #if defined(VMSWAP)
  391         uao_dropswap_range1(aobj, 0, 0);
  392 #endif /* defined(VMSWAP) */
  393 
  394         mutex_exit(&aobj->u_obj.vmobjlock);
  395 
  396 #if defined(VMSWAP)
  397         if (UAO_USES_SWHASH(aobj)) {
  398 
  399                 /*
  400                  * free the hash table itself.
  401                  */
  402 
  403                 hashdone(aobj->u_swhash, HASH_LIST, aobj->u_swhashmask);
  404         } else {
  405 
  406                 /*
  407                  * free the array itsself.
  408                  */
  409 
  410                 kmem_free(aobj->u_swslots, aobj->u_pages * sizeof(int));
  411         }
  412 #endif /* defined(VMSWAP) */
  413 
  414         /*
  415          * finally free the aobj itself
  416          */
  417 
  418         UVM_OBJ_DESTROY(&aobj->u_obj);
  419         kmem_free(aobj, sizeof(struct uvm_aobj));
  420 }
  421 
  422 /*
  423  * pager functions
  424  */
  425 
  426 /*
  427  * uao_create: create an aobj of the given size and return its uvm_object.
  428  *
  429  * => for normal use, flags are always zero
  430  * => for the kernel object, the flags are:
  431  *      UAO_FLAG_KERNOBJ - allocate the kernel object (can only happen once)
  432  *      UAO_FLAG_KERNSWAP - enable swapping of kernel object ("           ")
  433  */
  434 
  435 struct uvm_object *
  436 uao_create(vsize_t size, int flags)
  437 {
  438         static struct uvm_aobj kernel_object_store;
  439         static int kobj_alloced = 0;
  440         pgoff_t pages = round_page(size) >> PAGE_SHIFT;
  441         struct uvm_aobj *aobj;
  442         int refs;
  443 
  444         /*
  445          * Allocate a new aobj, unless kernel object is requested.
  446          */
  447 
  448         if (flags & UAO_FLAG_KERNOBJ) {
  449                 KASSERT(!kobj_alloced);
  450                 aobj = &kernel_object_store;
  451                 aobj->u_pages = pages;
  452                 aobj->u_flags = UAO_FLAG_NOSWAP;
  453                 refs = UVM_OBJ_KERN;
  454                 kobj_alloced = UAO_FLAG_KERNOBJ;
  455         } else if (flags & UAO_FLAG_KERNSWAP) {
  456                 KASSERT(kobj_alloced == UAO_FLAG_KERNOBJ);
  457                 aobj = &kernel_object_store;
  458                 kobj_alloced = UAO_FLAG_KERNSWAP;
  459                 refs = 0xdeadbeaf; /* XXX: gcc */
  460         } else {
  461                 aobj = kmem_alloc(sizeof(struct uvm_aobj), KM_SLEEP);
  462                 aobj->u_pages = pages;
  463                 aobj->u_flags = 0;
  464                 refs = 1;
  465         }
  466 
  467         /*
  468          * allocate hash/array if necessary
  469          *
  470          * note: in the KERNSWAP case no need to worry about locking since
  471          * we are still booting we should be the only thread around.
  472          */
  473 
  474         if (flags == 0 || (flags & UAO_FLAG_KERNSWAP) != 0) {
  475 #if defined(VMSWAP)
  476                 const int kernswap = (flags & UAO_FLAG_KERNSWAP) != 0;
  477 
  478                 /* allocate hash table or array depending on object size */
  479                 if (UAO_USES_SWHASH(aobj)) {
  480                         aobj->u_swhash = hashinit(UAO_SWHASH_BUCKETS(aobj),
  481                             HASH_LIST, kernswap ? false : true,
  482                             &aobj->u_swhashmask);
  483                         if (aobj->u_swhash == NULL)
  484                                 panic("uao_create: hashinit swhash failed");
  485                 } else {
  486                         aobj->u_swslots = kmem_zalloc(pages * sizeof(int),
  487                             kernswap ? KM_NOSLEEP : KM_SLEEP);
  488                         if (aobj->u_swslots == NULL)
  489                                 panic("uao_create: swslots allocation failed");
  490                 }
  491 #endif /* defined(VMSWAP) */
  492 
  493                 if (flags) {
  494                         aobj->u_flags &= ~UAO_FLAG_NOSWAP; /* clear noswap */
  495                         return(&aobj->u_obj);
  496                 }
  497         }
  498 
  499         /*
  500          * init aobj fields
  501          */
  502 
  503         UVM_OBJ_INIT(&aobj->u_obj, &aobj_pager, refs);
  504 
  505         /*
  506          * now that aobj is ready, add it to the global list
  507          */
  508 
  509         mutex_enter(&uao_list_lock);
  510         LIST_INSERT_HEAD(&uao_list, aobj, u_list);
  511         mutex_exit(&uao_list_lock);
  512         return(&aobj->u_obj);
  513 }
  514 
  515 
  516 
  517 /*
  518  * uao_init: set up aobj pager subsystem
  519  *
  520  * => called at boot time from uvm_pager_init()
  521  */
  522 
  523 void
  524 uao_init(void)
  525 {
  526         static int uao_initialized;
  527 
  528         if (uao_initialized)
  529                 return;
  530         uao_initialized = true;
  531         LIST_INIT(&uao_list);
  532         mutex_init(&uao_list_lock, MUTEX_DEFAULT, IPL_NONE);
  533         pool_init(&uao_swhash_elt_pool, sizeof(struct uao_swhash_elt),
  534             0, 0, 0, "uaoeltpl", NULL, IPL_VM);
  535 }
  536 
  537 /*
  538  * uao_reference: add a ref to an aobj
  539  *
  540  * => aobj must be unlocked
  541  * => just lock it and call the locked version
  542  */
  543 
  544 void
  545 uao_reference(struct uvm_object *uobj)
  546 {
  547 
  548         /*
  549          * kernel_object already has plenty of references, leave it alone.
  550          */
  551 
  552         if (UVM_OBJ_IS_KERN_OBJECT(uobj))
  553                 return;
  554 
  555         mutex_enter(&uobj->vmobjlock);
  556         uao_reference_locked(uobj);
  557         mutex_exit(&uobj->vmobjlock);
  558 }
  559 
  560 /*
  561  * uao_reference_locked: add a ref to an aobj that is already locked
  562  *
  563  * => aobj must be locked
  564  * this needs to be separate from the normal routine
  565  * since sometimes we need to add a reference to an aobj when
  566  * it's already locked.
  567  */
  568 
  569 static void
  570 uao_reference_locked(struct uvm_object *uobj)
  571 {
  572         UVMHIST_FUNC("uao_reference"); UVMHIST_CALLED(maphist);
  573 
  574         /*
  575          * kernel_object already has plenty of references, leave it alone.
  576          */
  577 
  578         if (UVM_OBJ_IS_KERN_OBJECT(uobj))
  579                 return;
  580 
  581         uobj->uo_refs++;
  582         UVMHIST_LOG(maphist, "<- done (uobj=0x%x, ref = %d)",
  583                     uobj, uobj->uo_refs,0,0);
  584 }
  585 
  586 /*
  587  * uao_detach: drop a reference to an aobj
  588  *
  589  * => aobj must be unlocked
  590  * => just lock it and call the locked version
  591  */
  592 
  593 void
  594 uao_detach(struct uvm_object *uobj)
  595 {
  596 
  597         /*
  598          * detaching from kernel_object is a noop.
  599          */
  600 
  601         if (UVM_OBJ_IS_KERN_OBJECT(uobj))
  602                 return;
  603 
  604         mutex_enter(&uobj->vmobjlock);
  605         uao_detach_locked(uobj);
  606 }
  607 
  608 /*
  609  * uao_detach_locked: drop a reference to an aobj
  610  *
  611  * => aobj must be locked, and is unlocked (or freed) upon return.
  612  * this needs to be separate from the normal routine
  613  * since sometimes we need to detach from an aobj when
  614  * it's already locked.
  615  */
  616 
  617 static void
  618 uao_detach_locked(struct uvm_object *uobj)
  619 {
  620         struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
  621         struct vm_page *pg;
  622         UVMHIST_FUNC("uao_detach"); UVMHIST_CALLED(maphist);
  623 
  624         /*
  625          * detaching from kernel_object is a noop.
  626          */
  627 
  628         if (UVM_OBJ_IS_KERN_OBJECT(uobj)) {
  629                 mutex_exit(&uobj->vmobjlock);
  630                 return;
  631         }
  632 
  633         UVMHIST_LOG(maphist,"  (uobj=0x%x)  ref=%d", uobj,uobj->uo_refs,0,0);
  634         uobj->uo_refs--;
  635         if (uobj->uo_refs) {
  636                 mutex_exit(&uobj->vmobjlock);
  637                 UVMHIST_LOG(maphist, "<- done (rc>0)", 0,0,0,0);
  638                 return;
  639         }
  640 
  641         /*
  642          * remove the aobj from the global list.
  643          */
  644 
  645         mutex_enter(&uao_list_lock);
  646         LIST_REMOVE(aobj, u_list);
  647         mutex_exit(&uao_list_lock);
  648 
  649         /*
  650          * free all the pages left in the aobj.  for each page,
  651          * when the page is no longer busy (and thus after any disk i/o that
  652          * it's involved in is complete), release any swap resources and
  653          * free the page itself.
  654          */
  655 
  656         mutex_enter(&uvm_pageqlock);
  657         while ((pg = TAILQ_FIRST(&uobj->memq)) != NULL) {
  658                 pmap_page_protect(pg, VM_PROT_NONE);
  659                 if (pg->flags & PG_BUSY) {
  660                         pg->flags |= PG_WANTED;
  661                         mutex_exit(&uvm_pageqlock);
  662                         UVM_UNLOCK_AND_WAIT(pg, &uobj->vmobjlock, false,
  663                             "uao_det", 0);
  664                         mutex_enter(&uobj->vmobjlock);
  665                         mutex_enter(&uvm_pageqlock);
  666                         continue;
  667                 }
  668                 uao_dropswap(&aobj->u_obj, pg->offset >> PAGE_SHIFT);
  669                 uvm_pagefree(pg);
  670         }
  671         mutex_exit(&uvm_pageqlock);
  672 
  673         /*
  674          * finally, free the aobj itself.
  675          */
  676 
  677         uao_free(aobj);
  678 }
  679 
  680 /*
  681  * uao_put: flush pages out of a uvm object
  682  *
  683  * => object should be locked by caller.  we may _unlock_ the object
  684  *      if (and only if) we need to clean a page (PGO_CLEANIT).
  685  *      XXXJRT Currently, however, we don't.  In the case of cleaning
  686  *      XXXJRT a page, we simply just deactivate it.  Should probably
  687  *      XXXJRT handle this better, in the future (although "flushing"
  688  *      XXXJRT anonymous memory isn't terribly important).
  689  * => if PGO_CLEANIT is not set, then we will neither unlock the object
  690  *      or block.
  691  * => if PGO_ALLPAGE is set, then all pages in the object are valid targets
  692  *      for flushing.
  693  * => NOTE: we rely on the fact that the object's memq is a TAILQ and
  694  *      that new pages are inserted on the tail end of the list.  thus,
  695  *      we can make a complete pass through the object in one go by starting
  696  *      at the head and working towards the tail (new pages are put in
  697  *      front of us).
  698  * => NOTE: we are allowed to lock the page queues, so the caller
  699  *      must not be holding the lock on them [e.g. pagedaemon had
  700  *      better not call us with the queues locked]
  701  * => we return 0 unless we encountered some sort of I/O error
  702  *      XXXJRT currently never happens, as we never directly initiate
  703  *      XXXJRT I/O
  704  *
  705  * note on page traversal:
  706  *      we can traverse the pages in an object either by going down the
  707  *      linked list in "uobj->memq", or we can go over the address range
  708  *      by page doing hash table lookups for each address.  depending
  709  *      on how many pages are in the object it may be cheaper to do one
  710  *      or the other.  we set "by_list" to true if we are using memq.
  711  *      if the cost of a hash lookup was equal to the cost of the list
  712  *      traversal we could compare the number of pages in the start->stop
  713  *      range to the total number of pages in the object.  however, it
  714  *      seems that a hash table lookup is more expensive than the linked
  715  *      list traversal, so we multiply the number of pages in the
  716  *      start->stop range by a penalty which we define below.
  717  */
  718 
  719 static int
  720 uao_put(struct uvm_object *uobj, voff_t start, voff_t stop, int flags)
  721 {
  722         struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
  723         struct vm_page *pg, *nextpg, curmp, endmp;
  724         bool by_list;
  725         voff_t curoff;
  726         UVMHIST_FUNC("uao_put"); UVMHIST_CALLED(maphist);
  727 
  728         KASSERT(mutex_owned(&uobj->vmobjlock));
  729 
  730         curoff = 0;
  731         if (flags & PGO_ALLPAGES) {
  732                 start = 0;
  733                 stop = aobj->u_pages << PAGE_SHIFT;
  734                 by_list = true;         /* always go by the list */
  735         } else {
  736                 start = trunc_page(start);
  737                 if (stop == 0) {
  738                         stop = aobj->u_pages << PAGE_SHIFT;
  739                 } else {
  740                         stop = round_page(stop);
  741                 }
  742                 if (stop > (aobj->u_pages << PAGE_SHIFT)) {
  743                         printf("uao_flush: strange, got an out of range "
  744                             "flush (fixed)\n");
  745                         stop = aobj->u_pages << PAGE_SHIFT;
  746                 }
  747                 by_list = (uobj->uo_npages <=
  748                     ((stop - start) >> PAGE_SHIFT) * UVM_PAGE_TREE_PENALTY);
  749         }
  750         UVMHIST_LOG(maphist,
  751             " flush start=0x%lx, stop=0x%x, by_list=%d, flags=0x%x",
  752             start, stop, by_list, flags);
  753 
  754         /*
  755          * Don't need to do any work here if we're not freeing
  756          * or deactivating pages.
  757          */
  758 
  759         if ((flags & (PGO_DEACTIVATE|PGO_FREE)) == 0) {
  760                 mutex_exit(&uobj->vmobjlock);
  761                 return 0;
  762         }
  763 
  764         /*
  765          * Initialize the marker pages.  See the comment in
  766          * genfs_putpages() also.
  767          */
  768 
  769         curmp.flags = PG_MARKER;
  770         endmp.flags = PG_MARKER;
  771 
  772         /*
  773          * now do it.  note: we must update nextpg in the body of loop or we
  774          * will get stuck.  we need to use nextpg if we'll traverse the list
  775          * because we may free "pg" before doing the next loop.
  776          */
  777 
  778         if (by_list) {
  779                 TAILQ_INSERT_TAIL(&uobj->memq, &endmp, listq.queue);
  780                 nextpg = TAILQ_FIRST(&uobj->memq);
  781         } else {
  782                 curoff = start;
  783                 nextpg = NULL;  /* Quell compiler warning */
  784         }
  785 
  786         /* locked: uobj */
  787         for (;;) {
  788                 if (by_list) {
  789                         pg = nextpg;
  790                         if (pg == &endmp)
  791                                 break;
  792                         nextpg = TAILQ_NEXT(pg, listq.queue);
  793                         if (pg->flags & PG_MARKER)
  794                                 continue;
  795                         if (pg->offset < start || pg->offset >= stop)
  796                                 continue;
  797                 } else {
  798                         if (curoff < stop) {
  799                                 pg = uvm_pagelookup(uobj, curoff);
  800                                 curoff += PAGE_SIZE;
  801                         } else
  802                                 break;
  803                         if (pg == NULL)
  804                                 continue;
  805                 }
  806 
  807                 /*
  808                  * wait and try again if the page is busy.
  809                  */
  810 
  811                 if (pg->flags & PG_BUSY) {
  812                         if (by_list) {
  813                                 TAILQ_INSERT_BEFORE(pg, &curmp, listq.queue);
  814                         }
  815                         pg->flags |= PG_WANTED;
  816                         UVM_UNLOCK_AND_WAIT(pg, &uobj->vmobjlock, 0,
  817                             "uao_put", 0);
  818                         mutex_enter(&uobj->vmobjlock);
  819                         if (by_list) {
  820                                 nextpg = TAILQ_NEXT(&curmp, listq.queue);
  821                                 TAILQ_REMOVE(&uobj->memq, &curmp,
  822                                     listq.queue);
  823                         } else
  824                                 curoff -= PAGE_SIZE;
  825                         continue;
  826                 }
  827 
  828                 switch (flags & (PGO_CLEANIT|PGO_FREE|PGO_DEACTIVATE)) {
  829 
  830                 /*
  831                  * XXX In these first 3 cases, we always just
  832                  * XXX deactivate the page.  We may want to
  833                  * XXX handle the different cases more specifically
  834                  * XXX in the future.
  835                  */
  836 
  837                 case PGO_CLEANIT|PGO_FREE:
  838                 case PGO_CLEANIT|PGO_DEACTIVATE:
  839                 case PGO_DEACTIVATE:
  840  deactivate_it:
  841                         mutex_enter(&uvm_pageqlock);
  842                         /* skip the page if it's wired */
  843                         if (pg->wire_count == 0) {
  844                                 uvm_pagedeactivate(pg);
  845                         }
  846                         mutex_exit(&uvm_pageqlock);
  847                         break;
  848 
  849                 case PGO_FREE:
  850                         /*
  851                          * If there are multiple references to
  852                          * the object, just deactivate the page.
  853                          */
  854 
  855                         if (uobj->uo_refs > 1)
  856                                 goto deactivate_it;
  857 
  858                         /*
  859                          * free the swap slot and the page.
  860                          */
  861 
  862                         pmap_page_protect(pg, VM_PROT_NONE);
  863 
  864                         /*
  865                          * freeing swapslot here is not strictly necessary.
  866                          * however, leaving it here doesn't save much
  867                          * because we need to update swap accounting anyway.
  868                          */
  869 
  870                         uao_dropswap(uobj, pg->offset >> PAGE_SHIFT);
  871                         mutex_enter(&uvm_pageqlock);
  872                         uvm_pagefree(pg);
  873                         mutex_exit(&uvm_pageqlock);
  874                         break;
  875 
  876                 default:
  877                         panic("%s: impossible", __func__);
  878                 }
  879         }
  880         if (by_list) {
  881                 TAILQ_REMOVE(&uobj->memq, &endmp, listq.queue);
  882         }
  883         mutex_exit(&uobj->vmobjlock);
  884         return 0;
  885 }
  886 
  887 /*
  888  * uao_get: fetch me a page
  889  *
  890  * we have three cases:
  891  * 1: page is resident     -> just return the page.
  892  * 2: page is zero-fill    -> allocate a new page and zero it.
  893  * 3: page is swapped out  -> fetch the page from swap.
  894  *
  895  * cases 1 and 2 can be handled with PGO_LOCKED, case 3 cannot.
  896  * so, if the "center" page hits case 3 (or any page, with PGO_ALLPAGES),
  897  * then we will need to return EBUSY.
  898  *
  899  * => prefer map unlocked (not required)
  900  * => object must be locked!  we will _unlock_ it before starting any I/O.
  901  * => flags: PGO_ALLPAGES: get all of the pages
  902  *           PGO_LOCKED: fault data structures are locked
  903  * => NOTE: offset is the offset of pps[0], _NOT_ pps[centeridx]
  904  * => NOTE: caller must check for released pages!!
  905  */
  906 
  907 static int
  908 uao_get(struct uvm_object *uobj, voff_t offset, struct vm_page **pps,
  909     int *npagesp, int centeridx, vm_prot_t access_type, int advice, int flags)
  910 {
  911 #if defined(VMSWAP)
  912         struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
  913 #endif /* defined(VMSWAP) */
  914         voff_t current_offset;
  915         struct vm_page *ptmp = NULL;    /* Quell compiler warning */
  916         int lcv, gotpages, maxpages, swslot, pageidx;
  917         bool done;
  918         UVMHIST_FUNC("uao_get"); UVMHIST_CALLED(pdhist);
  919 
  920         UVMHIST_LOG(pdhist, "aobj=%p offset=%d, flags=%d",
  921                     (struct uvm_aobj *)uobj, offset, flags,0);
  922 
  923         /*
  924          * get number of pages
  925          */
  926 
  927         maxpages = *npagesp;
  928 
  929         /*
  930          * step 1: handled the case where fault data structures are locked.
  931          */
  932 
  933         if (flags & PGO_LOCKED) {
  934 
  935                 /*
  936                  * step 1a: get pages that are already resident.   only do
  937                  * this if the data structures are locked (i.e. the first
  938                  * time through).
  939                  */
  940 
  941                 done = true;    /* be optimistic */
  942                 gotpages = 0;   /* # of pages we got so far */
  943                 for (lcv = 0, current_offset = offset ; lcv < maxpages ;
  944                     lcv++, current_offset += PAGE_SIZE) {
  945                         /* do we care about this page?  if not, skip it */
  946                         if (pps[lcv] == PGO_DONTCARE)
  947                                 continue;
  948                         ptmp = uvm_pagelookup(uobj, current_offset);
  949 
  950                         /*
  951                          * if page is new, attempt to allocate the page,
  952                          * zero-fill'd.
  953                          */
  954 
  955                         if (ptmp == NULL && uao_find_swslot(&aobj->u_obj,
  956                             current_offset >> PAGE_SHIFT) == 0) {
  957                                 ptmp = uvm_pagealloc(uobj, current_offset,
  958                                     NULL, UVM_PGA_ZERO);
  959                                 if (ptmp) {
  960                                         /* new page */
  961                                         ptmp->flags &= ~(PG_FAKE);
  962                                         ptmp->pqflags |= PQ_AOBJ;
  963                                         goto gotpage;
  964                                 }
  965                         }
  966 
  967                         /*
  968                          * to be useful must get a non-busy page
  969                          */
  970 
  971                         if (ptmp == NULL || (ptmp->flags & PG_BUSY) != 0) {
  972                                 if (lcv == centeridx ||
  973                                     (flags & PGO_ALLPAGES) != 0)
  974                                         /* need to do a wait or I/O! */
  975                                         done = false;
  976                                         continue;
  977                         }
  978 
  979                         /*
  980                          * useful page: busy/lock it and plug it in our
  981                          * result array
  982                          */
  983 
  984                         /* caller must un-busy this page */
  985                         ptmp->flags |= PG_BUSY;
  986                         UVM_PAGE_OWN(ptmp, "uao_get1");
  987 gotpage:
  988                         pps[lcv] = ptmp;
  989                         gotpages++;
  990                 }
  991 
  992                 /*
  993                  * step 1b: now we've either done everything needed or we
  994                  * to unlock and do some waiting or I/O.
  995                  */
  996 
  997                 UVMHIST_LOG(pdhist, "<- done (done=%d)", done, 0,0,0);
  998                 *npagesp = gotpages;
  999                 if (done)
 1000                         return 0;
 1001                 else
 1002                         return EBUSY;
 1003         }
 1004 
 1005         /*
 1006          * step 2: get non-resident or busy pages.
 1007          * object is locked.   data structures are unlocked.
 1008          */
 1009 
 1010         if ((flags & PGO_SYNCIO) == 0) {
 1011                 goto done;
 1012         }
 1013 
 1014         for (lcv = 0, current_offset = offset ; lcv < maxpages ;
 1015             lcv++, current_offset += PAGE_SIZE) {
 1016 
 1017                 /*
 1018                  * - skip over pages we've already gotten or don't want
 1019                  * - skip over pages we don't _have_ to get
 1020                  */
 1021 
 1022                 if (pps[lcv] != NULL ||
 1023                     (lcv != centeridx && (flags & PGO_ALLPAGES) == 0))
 1024                         continue;
 1025 
 1026                 pageidx = current_offset >> PAGE_SHIFT;
 1027 
 1028                 /*
 1029                  * we have yet to locate the current page (pps[lcv]).   we
 1030                  * first look for a page that is already at the current offset.
 1031                  * if we find a page, we check to see if it is busy or
 1032                  * released.  if that is the case, then we sleep on the page
 1033                  * until it is no longer busy or released and repeat the lookup.
 1034                  * if the page we found is neither busy nor released, then we
 1035                  * busy it (so we own it) and plug it into pps[lcv].   this
 1036                  * 'break's the following while loop and indicates we are
 1037                  * ready to move on to the next page in the "lcv" loop above.
 1038                  *
 1039                  * if we exit the while loop with pps[lcv] still set to NULL,
 1040                  * then it means that we allocated a new busy/fake/clean page
 1041                  * ptmp in the object and we need to do I/O to fill in the data.
 1042                  */
 1043 
 1044                 /* top of "pps" while loop */
 1045                 while (pps[lcv] == NULL) {
 1046                         /* look for a resident page */
 1047                         ptmp = uvm_pagelookup(uobj, current_offset);
 1048 
 1049                         /* not resident?   allocate one now (if we can) */
 1050                         if (ptmp == NULL) {
 1051 
 1052                                 ptmp = uvm_pagealloc(uobj, current_offset,
 1053                                     NULL, 0);
 1054 
 1055                                 /* out of RAM? */
 1056                                 if (ptmp == NULL) {
 1057                                         mutex_exit(&uobj->vmobjlock);
 1058                                         UVMHIST_LOG(pdhist,
 1059                                             "sleeping, ptmp == NULL\n",0,0,0,0);
 1060                                         uvm_wait("uao_getpage");
 1061                                         mutex_enter(&uobj->vmobjlock);
 1062                                         continue;
 1063                                 }
 1064 
 1065                                 /*
 1066                                  * safe with PQ's unlocked: because we just
 1067                                  * alloc'd the page
 1068                                  */
 1069 
 1070                                 ptmp->pqflags |= PQ_AOBJ;
 1071 
 1072                                 /*
 1073                                  * got new page ready for I/O.  break pps while
 1074                                  * loop.  pps[lcv] is still NULL.
 1075                                  */
 1076 
 1077                                 break;
 1078                         }
 1079 
 1080                         /* page is there, see if we need to wait on it */
 1081                         if ((ptmp->flags & PG_BUSY) != 0) {
 1082                                 ptmp->flags |= PG_WANTED;
 1083                                 UVMHIST_LOG(pdhist,
 1084                                     "sleeping, ptmp->flags 0x%x\n",
 1085                                     ptmp->flags,0,0,0);
 1086                                 UVM_UNLOCK_AND_WAIT(ptmp, &uobj->vmobjlock,
 1087                                     false, "uao_get", 0);
 1088                                 mutex_enter(&uobj->vmobjlock);
 1089                                 continue;
 1090                         }
 1091 
 1092                         /*
 1093                          * if we get here then the page has become resident and
 1094                          * unbusy between steps 1 and 2.  we busy it now (so we
 1095                          * own it) and set pps[lcv] (so that we exit the while
 1096                          * loop).
 1097                          */
 1098 
 1099                         /* we own it, caller must un-busy */
 1100                         ptmp->flags |= PG_BUSY;
 1101                         UVM_PAGE_OWN(ptmp, "uao_get2");
 1102                         pps[lcv] = ptmp;
 1103                 }
 1104 
 1105                 /*
 1106                  * if we own the valid page at the correct offset, pps[lcv] will
 1107                  * point to it.   nothing more to do except go to the next page.
 1108                  */
 1109 
 1110                 if (pps[lcv])
 1111                         continue;                       /* next lcv */
 1112 
 1113                 /*
 1114                  * we have a "fake/busy/clean" page that we just allocated.
 1115                  * do the needed "i/o", either reading from swap or zeroing.
 1116                  */
 1117 
 1118                 swslot = uao_find_swslot(&aobj->u_obj, pageidx);
 1119 
 1120                 /*
 1121                  * just zero the page if there's nothing in swap.
 1122                  */
 1123 
 1124                 if (swslot == 0) {
 1125 
 1126                         /*
 1127                          * page hasn't existed before, just zero it.
 1128                          */
 1129 
 1130                         uvm_pagezero(ptmp);
 1131                 } else {
 1132 #if defined(VMSWAP)
 1133                         int error;
 1134 
 1135                         UVMHIST_LOG(pdhist, "pagein from swslot %d",
 1136                              swslot, 0,0,0);
 1137 
 1138                         /*
 1139                          * page in the swapped-out page.
 1140                          * unlock object for i/o, relock when done.
 1141                          */
 1142 
 1143                         mutex_exit(&uobj->vmobjlock);
 1144                         error = uvm_swap_get(ptmp, swslot, PGO_SYNCIO);
 1145                         mutex_enter(&uobj->vmobjlock);
 1146 
 1147                         /*
 1148                          * I/O done.  check for errors.
 1149                          */
 1150 
 1151                         if (error != 0) {
 1152                                 UVMHIST_LOG(pdhist, "<- done (error=%d)",
 1153                                     error,0,0,0);
 1154                                 if (ptmp->flags & PG_WANTED)
 1155                                         wakeup(ptmp);
 1156 
 1157                                 /*
 1158                                  * remove the swap slot from the aobj
 1159                                  * and mark the aobj as having no real slot.
 1160                                  * don't free the swap slot, thus preventing
 1161                                  * it from being used again.
 1162                                  */
 1163 
 1164                                 swslot = uao_set_swslot(&aobj->u_obj, pageidx,
 1165                                                         SWSLOT_BAD);
 1166                                 if (swslot > 0) {
 1167                                         uvm_swap_markbad(swslot, 1);
 1168                                 }
 1169 
 1170                                 mutex_enter(&uvm_pageqlock);
 1171                                 uvm_pagefree(ptmp);
 1172                                 mutex_exit(&uvm_pageqlock);
 1173                                 mutex_exit(&uobj->vmobjlock);
 1174                                 return error;
 1175                         }
 1176 #else /* defined(VMSWAP) */
 1177                         panic("%s: pagein", __func__);
 1178 #endif /* defined(VMSWAP) */
 1179                 }
 1180 
 1181                 if ((access_type & VM_PROT_WRITE) == 0) {
 1182                         ptmp->flags |= PG_CLEAN;
 1183                         pmap_clear_modify(ptmp);
 1184                 }
 1185 
 1186                 /*
 1187                  * we got the page!   clear the fake flag (indicates valid
 1188                  * data now in page) and plug into our result array.   note
 1189                  * that page is still busy.
 1190                  *
 1191                  * it is the callers job to:
 1192                  * => check if the page is released
 1193                  * => unbusy the page
 1194                  * => activate the page
 1195                  */
 1196 
 1197                 ptmp->flags &= ~PG_FAKE;
 1198                 pps[lcv] = ptmp;
 1199         }
 1200 
 1201         /*
 1202          * finally, unlock object and return.
 1203          */
 1204 
 1205 done:
 1206         mutex_exit(&uobj->vmobjlock);
 1207         UVMHIST_LOG(pdhist, "<- done (OK)",0,0,0,0);
 1208         return 0;
 1209 }
 1210 
 1211 #if defined(VMSWAP)
 1212 
 1213 /*
 1214  * uao_dropswap:  release any swap resources from this aobj page.
 1215  *
 1216  * => aobj must be locked or have a reference count of 0.
 1217  */
 1218 
 1219 void
 1220 uao_dropswap(struct uvm_object *uobj, int pageidx)
 1221 {
 1222         int slot;
 1223 
 1224         slot = uao_set_swslot(uobj, pageidx, 0);
 1225         if (slot) {
 1226                 uvm_swap_free(slot, 1);
 1227         }
 1228 }
 1229 
 1230 /*
 1231  * page in every page in every aobj that is paged-out to a range of swslots.
 1232  *
 1233  * => nothing should be locked.
 1234  * => returns true if pagein was aborted due to lack of memory.
 1235  */
 1236 
 1237 bool
 1238 uao_swap_off(int startslot, int endslot)
 1239 {
 1240         struct uvm_aobj *aobj, *nextaobj;
 1241         bool rv;
 1242 
 1243         /*
 1244          * walk the list of all aobjs.
 1245          */
 1246 
 1247 restart:
 1248         mutex_enter(&uao_list_lock);
 1249         for (aobj = LIST_FIRST(&uao_list);
 1250              aobj != NULL;
 1251              aobj = nextaobj) {
 1252 
 1253                 /*
 1254                  * try to get the object lock, start all over if we fail.
 1255                  * most of the time we'll get the aobj lock,
 1256                  * so this should be a rare case.
 1257                  */
 1258 
 1259                 if (!mutex_tryenter(&aobj->u_obj.vmobjlock)) {
 1260                         mutex_exit(&uao_list_lock);
 1261                         /* XXX Better than yielding but inadequate. */
 1262                         kpause("livelock", false, 1, NULL);
 1263                         goto restart;
 1264                 }
 1265 
 1266                 /*
 1267                  * add a ref to the aobj so it doesn't disappear
 1268                  * while we're working.
 1269                  */
 1270 
 1271                 uao_reference_locked(&aobj->u_obj);
 1272 
 1273                 /*
 1274                  * now it's safe to unlock the uao list.
 1275                  */
 1276 
 1277                 mutex_exit(&uao_list_lock);
 1278 
 1279                 /*
 1280                  * page in any pages in the swslot range.
 1281                  * if there's an error, abort and return the error.
 1282                  */
 1283 
 1284                 rv = uao_pagein(aobj, startslot, endslot);
 1285                 if (rv) {
 1286                         uao_detach_locked(&aobj->u_obj);
 1287                         return rv;
 1288                 }
 1289 
 1290                 /*
 1291                  * we're done with this aobj.
 1292                  * relock the list and drop our ref on the aobj.
 1293                  */
 1294 
 1295                 mutex_enter(&uao_list_lock);
 1296                 nextaobj = LIST_NEXT(aobj, u_list);
 1297                 uao_detach_locked(&aobj->u_obj);
 1298         }
 1299 
 1300         /*
 1301          * done with traversal, unlock the list
 1302          */
 1303         mutex_exit(&uao_list_lock);
 1304         return false;
 1305 }
 1306 
 1307 
 1308 /*
 1309  * page in any pages from aobj in the given range.
 1310  *
 1311  * => aobj must be locked and is returned locked.
 1312  * => returns true if pagein was aborted due to lack of memory.
 1313  */
 1314 static bool
 1315 uao_pagein(struct uvm_aobj *aobj, int startslot, int endslot)
 1316 {
 1317         bool rv;
 1318 
 1319         if (UAO_USES_SWHASH(aobj)) {
 1320                 struct uao_swhash_elt *elt;
 1321                 int buck;
 1322 
 1323 restart:
 1324                 for (buck = aobj->u_swhashmask; buck >= 0; buck--) {
 1325                         for (elt = LIST_FIRST(&aobj->u_swhash[buck]);
 1326                              elt != NULL;
 1327                              elt = LIST_NEXT(elt, list)) {
 1328                                 int i;
 1329 
 1330                                 for (i = 0; i < UAO_SWHASH_CLUSTER_SIZE; i++) {
 1331                                         int slot = elt->slots[i];
 1332 
 1333                                         /*
 1334                                          * if the slot isn't in range, skip it.
 1335                                          */
 1336 
 1337                                         if (slot < startslot ||
 1338                                             slot >= endslot) {
 1339                                                 continue;
 1340                                         }
 1341 
 1342                                         /*
 1343                                          * process the page,
 1344                                          * the start over on this object
 1345                                          * since the swhash elt
 1346                                          * may have been freed.
 1347                                          */
 1348 
 1349                                         rv = uao_pagein_page(aobj,
 1350                                           UAO_SWHASH_ELT_PAGEIDX_BASE(elt) + i);
 1351                                         if (rv) {
 1352                                                 return rv;
 1353                                         }
 1354                                         goto restart;
 1355                                 }
 1356                         }
 1357                 }
 1358         } else {
 1359                 int i;
 1360 
 1361                 for (i = 0; i < aobj->u_pages; i++) {
 1362                         int slot = aobj->u_swslots[i];
 1363 
 1364                         /*
 1365                          * if the slot isn't in range, skip it
 1366                          */
 1367 
 1368                         if (slot < startslot || slot >= endslot) {
 1369                                 continue;
 1370                         }
 1371 
 1372                         /*
 1373                          * process the page.
 1374                          */
 1375 
 1376                         rv = uao_pagein_page(aobj, i);
 1377                         if (rv) {
 1378                                 return rv;
 1379                         }
 1380                 }
 1381         }
 1382 
 1383         return false;
 1384 }
 1385 
 1386 /*
 1387  * page in a page from an aobj.  used for swap_off.
 1388  * returns true if pagein was aborted due to lack of memory.
 1389  *
 1390  * => aobj must be locked and is returned locked.
 1391  */
 1392 
 1393 static bool
 1394 uao_pagein_page(struct uvm_aobj *aobj, int pageidx)
 1395 {
 1396         struct vm_page *pg;
 1397         int rv, npages;
 1398 
 1399         pg = NULL;
 1400         npages = 1;
 1401         /* locked: aobj */
 1402         rv = uao_get(&aobj->u_obj, pageidx << PAGE_SHIFT,
 1403             &pg, &npages, 0, VM_PROT_READ|VM_PROT_WRITE, 0, PGO_SYNCIO);
 1404         /* unlocked: aobj */
 1405 
 1406         /*
 1407          * relock and finish up.
 1408          */
 1409 
 1410         mutex_enter(&aobj->u_obj.vmobjlock);
 1411         switch (rv) {
 1412         case 0:
 1413                 break;
 1414 
 1415         case EIO:
 1416         case ERESTART:
 1417 
 1418                 /*
 1419                  * nothing more to do on errors.
 1420                  * ERESTART can only mean that the anon was freed,
 1421                  * so again there's nothing to do.
 1422                  */
 1423 
 1424                 return false;
 1425 
 1426         default:
 1427                 return true;
 1428         }
 1429 
 1430         /*
 1431          * ok, we've got the page now.
 1432          * mark it as dirty, clear its swslot and un-busy it.
 1433          */
 1434         uao_dropswap(&aobj->u_obj, pageidx);
 1435 
 1436         /*
 1437          * make sure it's on a page queue.
 1438          */
 1439         mutex_enter(&uvm_pageqlock);
 1440         if (pg->wire_count == 0)
 1441                 uvm_pageenqueue(pg);
 1442         mutex_exit(&uvm_pageqlock);
 1443 
 1444         if (pg->flags & PG_WANTED) {
 1445                 wakeup(pg);
 1446         }
 1447         pg->flags &= ~(PG_WANTED|PG_BUSY|PG_CLEAN|PG_FAKE);
 1448         UVM_PAGE_OWN(pg, NULL);
 1449 
 1450         return false;
 1451 }
 1452 
 1453 /*
 1454  * uao_dropswap_range: drop swapslots in the range.
 1455  *
 1456  * => aobj must be locked and is returned locked.
 1457  * => start is inclusive.  end is exclusive.
 1458  */
 1459 
 1460 void
 1461 uao_dropswap_range(struct uvm_object *uobj, voff_t start, voff_t end)
 1462 {
 1463         struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
 1464 
 1465         KASSERT(mutex_owned(&uobj->vmobjlock));
 1466 
 1467         uao_dropswap_range1(aobj, start, end);
 1468 }
 1469 
 1470 static void
 1471 uao_dropswap_range1(struct uvm_aobj *aobj, voff_t start, voff_t end)
 1472 {
 1473         int swpgonlydelta = 0;
 1474 
 1475         if (end == 0) {
 1476                 end = INT64_MAX;
 1477         }
 1478 
 1479         if (UAO_USES_SWHASH(aobj)) {
 1480                 int i, hashbuckets = aobj->u_swhashmask + 1;
 1481                 voff_t taghi;
 1482                 voff_t taglo;
 1483 
 1484                 taglo = UAO_SWHASH_ELT_TAG(start);
 1485                 taghi = UAO_SWHASH_ELT_TAG(end);
 1486 
 1487                 for (i = 0; i < hashbuckets; i++) {
 1488                         struct uao_swhash_elt *elt, *next;
 1489 
 1490                         for (elt = LIST_FIRST(&aobj->u_swhash[i]);
 1491                              elt != NULL;
 1492                              elt = next) {
 1493                                 int startidx, endidx;
 1494                                 int j;
 1495 
 1496                                 next = LIST_NEXT(elt, list);
 1497 
 1498                                 if (elt->tag < taglo || taghi < elt->tag) {
 1499                                         continue;
 1500                                 }
 1501 
 1502                                 if (elt->tag == taglo) {
 1503                                         startidx =
 1504                                             UAO_SWHASH_ELT_PAGESLOT_IDX(start);
 1505                                 } else {
 1506                                         startidx = 0;
 1507                                 }
 1508 
 1509                                 if (elt->tag == taghi) {
 1510                                         endidx =
 1511                                             UAO_SWHASH_ELT_PAGESLOT_IDX(end);
 1512                                 } else {
 1513                                         endidx = UAO_SWHASH_CLUSTER_SIZE;
 1514                                 }
 1515 
 1516                                 for (j = startidx; j < endidx; j++) {
 1517                                         int slot = elt->slots[j];
 1518 
 1519                                         KASSERT(uvm_pagelookup(&aobj->u_obj,
 1520                                             (UAO_SWHASH_ELT_PAGEIDX_BASE(elt)
 1521                                             + j) << PAGE_SHIFT) == NULL);
 1522                                         if (slot > 0) {
 1523                                                 uvm_swap_free(slot, 1);
 1524                                                 swpgonlydelta++;
 1525                                                 KASSERT(elt->count > 0);
 1526                                                 elt->slots[j] = 0;
 1527                                                 elt->count--;
 1528                                         }
 1529                                 }
 1530 
 1531                                 if (elt->count == 0) {
 1532                                         LIST_REMOVE(elt, list);
 1533                                         pool_put(&uao_swhash_elt_pool, elt);
 1534                                 }
 1535                         }
 1536                 }
 1537         } else {
 1538                 int i;
 1539 
 1540                 if (aobj->u_pages < end) {
 1541                         end = aobj->u_pages;
 1542                 }
 1543                 for (i = start; i < end; i++) {
 1544                         int slot = aobj->u_swslots[i];
 1545 
 1546                         if (slot > 0) {
 1547                                 uvm_swap_free(slot, 1);
 1548                                 swpgonlydelta++;
 1549                         }
 1550                 }
 1551         }
 1552 
 1553         /*
 1554          * adjust the counter of pages only in swap for all
 1555          * the swap slots we've freed.
 1556          */
 1557 
 1558         if (swpgonlydelta > 0) {
 1559                 mutex_enter(&uvm_swap_data_lock);
 1560                 KASSERT(uvmexp.swpgonly >= swpgonlydelta);
 1561                 uvmexp.swpgonly -= swpgonlydelta;
 1562                 mutex_exit(&uvm_swap_data_lock);
 1563         }
 1564 }
 1565 
 1566 #endif /* defined(VMSWAP) */

Cache object: c7eff6bbb0378a75f048502fc4445ac4


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.