The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/uvm/uvm_pager.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*      $OpenBSD: uvm_pager.c,v 1.89 2022/08/19 05:53:19 mpi Exp $      */
    2 /*      $NetBSD: uvm_pager.c,v 1.36 2000/11/27 18:26:41 chs Exp $       */
    3 
    4 /*
    5  * Copyright (c) 1997 Charles D. Cranor and Washington University.
    6  * All rights reserved.
    7  *
    8  * Redistribution and use in source and binary forms, with or without
    9  * modification, are permitted provided that the following conditions
   10  * are met:
   11  * 1. Redistributions of source code must retain the above copyright
   12  *    notice, this list of conditions and the following disclaimer.
   13  * 2. Redistributions in binary form must reproduce the above copyright
   14  *    notice, this list of conditions and the following disclaimer in the
   15  *    documentation and/or other materials provided with the distribution.
   16  *
   17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
   18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
   19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
   20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
   21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
   22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
   23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
   24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
   25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
   26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
   27  *
   28  * from: Id: uvm_pager.c,v 1.1.2.23 1998/02/02 20:38:06 chuck Exp
   29  */
   30 
   31 /*
   32  * uvm_pager.c: generic functions used to assist the pagers.
   33  */
   34 
   35 #include <sys/param.h>
   36 #include <sys/systm.h>
   37 #include <sys/malloc.h>
   38 #include <sys/pool.h>
   39 #include <sys/buf.h>
   40 #include <sys/atomic.h>
   41 
   42 #include <uvm/uvm.h>
   43 
   44 const struct uvm_pagerops *uvmpagerops[] = {
   45         &aobj_pager,
   46         &uvm_deviceops,
   47         &uvm_vnodeops,
   48 };
   49 
   50 /*
   51  * the pager map: provides KVA for I/O
   52  *
   53  * Each uvm_pseg has room for MAX_PAGERMAP_SEGS pager io space of
   54  * MAXBSIZE bytes.
   55  *
   56  * The number of uvm_pseg instances is dynamic using an array segs.
   57  * At most UVM_PSEG_COUNT instances can exist.
   58  *
   59  * psegs[0/1] always exist (so that the pager can always map in pages).
   60  * psegs[0/1] element 0 are always reserved for the pagedaemon.
   61  *
   62  * Any other pseg is automatically created when no space is available
   63  * and automatically destroyed when it is no longer in use.
   64  */
   65 #define MAX_PAGER_SEGS  16
   66 #define PSEG_NUMSEGS    (PAGER_MAP_SIZE / MAX_PAGER_SEGS / MAXBSIZE)
   67 struct uvm_pseg {
   68         /* Start of virtual space; 0 if not inited. */
   69         vaddr_t start;
   70         /* Bitmap of the segments in use in this pseg. */
   71         int     use;
   72 };
   73 struct  mutex uvm_pseg_lck;
   74 struct  uvm_pseg psegs[PSEG_NUMSEGS];
   75 
   76 #define UVM_PSEG_FULL(pseg)     ((pseg)->use == (1 << MAX_PAGER_SEGS) - 1)
   77 #define UVM_PSEG_EMPTY(pseg)    ((pseg)->use == 0)
   78 #define UVM_PSEG_INUSE(pseg,id) (((pseg)->use & (1 << (id))) != 0)
   79 
   80 void            uvm_pseg_init(struct uvm_pseg *);
   81 vaddr_t         uvm_pseg_get(int);
   82 void            uvm_pseg_release(vaddr_t);
   83 
   84 /*
   85  * uvm_pager_init: init pagers (at boot time)
   86  */
   87 void
   88 uvm_pager_init(void)
   89 {
   90         int lcv;
   91 
   92         /* init pager map */
   93         uvm_pseg_init(&psegs[0]);
   94         uvm_pseg_init(&psegs[1]);
   95         mtx_init(&uvm_pseg_lck, IPL_VM);
   96 
   97         /* init ASYNC I/O queue */
   98         TAILQ_INIT(&uvm.aio_done);
   99 
  100         /* call pager init functions */
  101         for (lcv = 0 ; lcv < sizeof(uvmpagerops)/sizeof(struct uvm_pagerops *);
  102             lcv++) {
  103                 if (uvmpagerops[lcv]->pgo_init)
  104                         uvmpagerops[lcv]->pgo_init();
  105         }
  106 }
  107 
  108 /*
  109  * Initialize a uvm_pseg.
  110  *
  111  * May fail, in which case seg->start == 0.
  112  *
  113  * Caller locks uvm_pseg_lck.
  114  */
  115 void
  116 uvm_pseg_init(struct uvm_pseg *pseg)
  117 {
  118         KASSERT(pseg->start == 0);
  119         KASSERT(pseg->use == 0);
  120         pseg->start = (vaddr_t)km_alloc(MAX_PAGER_SEGS * MAXBSIZE,
  121             &kv_any, &kp_none, &kd_trylock);
  122 }
  123 
  124 /*
  125  * Acquire a pager map segment.
  126  *
  127  * Returns a vaddr for paging. 0 on failure.
  128  *
  129  * Caller does not lock.
  130  */
  131 vaddr_t
  132 uvm_pseg_get(int flags)
  133 {
  134         int i;
  135         struct uvm_pseg *pseg;
  136 
  137         /*
  138          * XXX Prevent lock ordering issue in uvm_unmap_detach().  A real
  139          * fix would be to move the KERNEL_LOCK() out of uvm_unmap_detach().
  140          *
  141          *  witness_checkorder() at witness_checkorder+0xba0
  142          *  __mp_lock() at __mp_lock+0x5f
  143          *  uvm_unmap_detach() at uvm_unmap_detach+0xc5
  144          *  uvm_map() at uvm_map+0x857
  145          *  uvm_km_valloc_try() at uvm_km_valloc_try+0x65
  146          *  uvm_pseg_get() at uvm_pseg_get+0x6f
  147          *  uvm_pagermapin() at uvm_pagermapin+0x45
  148          *  uvn_io() at uvn_io+0xcf
  149          *  uvn_get() at uvn_get+0x156
  150          *  uvm_fault_lower() at uvm_fault_lower+0x28a
  151          *  uvm_fault() at uvm_fault+0x1b3
  152          *  upageflttrap() at upageflttrap+0x62
  153          */
  154         KERNEL_LOCK();
  155         mtx_enter(&uvm_pseg_lck);
  156 
  157 pager_seg_restart:
  158         /* Find first pseg that has room. */
  159         for (pseg = &psegs[0]; pseg != &psegs[PSEG_NUMSEGS]; pseg++) {
  160                 if (UVM_PSEG_FULL(pseg))
  161                         continue;
  162 
  163                 if (pseg->start == 0) {
  164                         /* Need initialization. */
  165                         uvm_pseg_init(pseg);
  166                         if (pseg->start == 0)
  167                                 goto pager_seg_fail;
  168                 }
  169 
  170                 /* Keep indexes 0,1 reserved for pagedaemon. */
  171                 if ((pseg == &psegs[0] || pseg == &psegs[1]) &&
  172                     (curproc != uvm.pagedaemon_proc))
  173                         i = 2;
  174                 else
  175                         i = 0;
  176 
  177                 for (; i < MAX_PAGER_SEGS; i++) {
  178                         if (!UVM_PSEG_INUSE(pseg, i)) {
  179                                 pseg->use |= 1 << i;
  180                                 mtx_leave(&uvm_pseg_lck);
  181                                 KERNEL_UNLOCK();
  182                                 return pseg->start + i * MAXBSIZE;
  183                         }
  184                 }
  185         }
  186 
  187 pager_seg_fail:
  188         if ((flags & UVMPAGER_MAPIN_WAITOK) != 0) {
  189                 msleep_nsec(&psegs, &uvm_pseg_lck, PVM, "pagerseg", INFSLP);
  190                 goto pager_seg_restart;
  191         }
  192 
  193         mtx_leave(&uvm_pseg_lck);
  194         KERNEL_UNLOCK();
  195         return 0;
  196 }
  197 
  198 /*
  199  * Release a pager map segment.
  200  *
  201  * Caller does not lock.
  202  *
  203  * Deallocates pseg if it is no longer in use.
  204  */
  205 void
  206 uvm_pseg_release(vaddr_t segaddr)
  207 {
  208         int id;
  209         struct uvm_pseg *pseg;
  210         vaddr_t va = 0;
  211 
  212         mtx_enter(&uvm_pseg_lck);
  213         for (pseg = &psegs[0]; pseg != &psegs[PSEG_NUMSEGS]; pseg++) {
  214                 if (pseg->start <= segaddr &&
  215                     segaddr < pseg->start + MAX_PAGER_SEGS * MAXBSIZE)
  216                         break;
  217         }
  218         KASSERT(pseg != &psegs[PSEG_NUMSEGS]);
  219 
  220         id = (segaddr - pseg->start) / MAXBSIZE;
  221         KASSERT(id >= 0 && id < MAX_PAGER_SEGS);
  222 
  223         /* test for no remainder */
  224         KDASSERT(segaddr == pseg->start + id * MAXBSIZE);
  225 
  226 
  227         KASSERT(UVM_PSEG_INUSE(pseg, id));
  228 
  229         pseg->use &= ~(1 << id);
  230         wakeup(&psegs);
  231 
  232         if ((pseg != &psegs[0] && pseg != &psegs[1]) && UVM_PSEG_EMPTY(pseg)) {
  233                 va = pseg->start;
  234                 pseg->start = 0;
  235         }
  236 
  237         mtx_leave(&uvm_pseg_lck);
  238 
  239         if (va) {
  240                 km_free((void *)va, MAX_PAGER_SEGS * MAXBSIZE,
  241                     &kv_any, &kp_none);
  242         }
  243 }
  244 
  245 /*
  246  * uvm_pagermapin: map pages into KVA for I/O that needs mappings
  247  *
  248  * We basically just km_valloc a blank map entry to reserve the space in the
  249  * kernel map and then use pmap_enter() to put the mappings in by hand.
  250  */
  251 vaddr_t
  252 uvm_pagermapin(struct vm_page **pps, int npages, int flags)
  253 {
  254         vaddr_t kva, cva;
  255         vm_prot_t prot;
  256         vsize_t size;
  257         struct vm_page *pp;
  258 
  259 #if defined(__HAVE_PMAP_DIRECT)
  260         /*
  261          * Use direct mappings for single page, unless there is a risk
  262          * of aliasing.
  263          */
  264         if (npages == 1 && PMAP_PREFER_ALIGN() == 0) {
  265                 KASSERT(pps[0]);
  266                 KASSERT(pps[0]->pg_flags & PG_BUSY);
  267                 return pmap_map_direct(pps[0]);
  268         }
  269 #endif
  270 
  271         prot = PROT_READ;
  272         if (flags & UVMPAGER_MAPIN_READ)
  273                 prot |= PROT_WRITE;
  274         size = ptoa(npages);
  275 
  276         KASSERT(size <= MAXBSIZE);
  277 
  278         kva = uvm_pseg_get(flags);
  279         if (kva == 0)
  280                 return 0;
  281 
  282         for (cva = kva ; size != 0 ; size -= PAGE_SIZE, cva += PAGE_SIZE) {
  283                 pp = *pps++;
  284                 KASSERT(pp);
  285                 KASSERT(pp->pg_flags & PG_BUSY);
  286                 /* Allow pmap_enter to fail. */
  287                 if (pmap_enter(pmap_kernel(), cva, VM_PAGE_TO_PHYS(pp),
  288                     prot, PMAP_WIRED | PMAP_CANFAIL | prot) != 0) {
  289                         pmap_remove(pmap_kernel(), kva, cva);
  290                         pmap_update(pmap_kernel());
  291                         uvm_pseg_release(kva);
  292                         return 0;
  293                 }
  294         }
  295         pmap_update(pmap_kernel());
  296         return kva;
  297 }
  298 
  299 /*
  300  * uvm_pagermapout: remove KVA mapping
  301  *
  302  * We remove our mappings by hand and then remove the mapping.
  303  */
  304 void
  305 uvm_pagermapout(vaddr_t kva, int npages)
  306 {
  307 #if defined(__HAVE_PMAP_DIRECT)
  308         /*
  309          * Use direct mappings for single page, unless there is a risk
  310          * of aliasing.
  311          */
  312         if (npages == 1 && PMAP_PREFER_ALIGN() == 0) {
  313                 pmap_unmap_direct(kva);
  314                 return;
  315         }
  316 #endif
  317 
  318         pmap_remove(pmap_kernel(), kva, kva + ((vsize_t)npages << PAGE_SHIFT));
  319         pmap_update(pmap_kernel());
  320         uvm_pseg_release(kva);
  321 
  322 }
  323 
  324 /*
  325  * uvm_mk_pcluster
  326  *
  327  * generic "make 'pager put' cluster" function.  a pager can either
  328  * [1] set pgo_mk_pcluster to NULL (never cluster), [2] set it to this
  329  * generic function, or [3] set it to a pager specific function.
  330  *
  331  * => caller must lock object _and_ pagequeues (since we need to look
  332  *    at active vs. inactive bits, etc.)
  333  * => caller must make center page busy and write-protect it
  334  * => we mark all cluster pages busy for the caller
  335  * => the caller must unbusy all pages (and check wanted/released
  336  *    status if it drops the object lock)
  337  * => flags:
  338  *      PGO_ALLPAGES:  all pages in object are valid targets
  339  *      !PGO_ALLPAGES: use "lo" and "hi" to limit range of cluster
  340  *      PGO_DOACTCLUST: include active pages in cluster.
  341  *      PGO_FREE: set the PG_RELEASED bits on the cluster so they'll be freed
  342  *              in async io (caller must clean on error).
  343  *        NOTE: the caller should clear PG_CLEANCHK bits if PGO_DOACTCLUST.
  344  *              PG_CLEANCHK is only a hint, but clearing will help reduce
  345  *              the number of calls we make to the pmap layer.
  346  */
  347 
  348 struct vm_page **
  349 uvm_mk_pcluster(struct uvm_object *uobj, struct vm_page **pps, int *npages,
  350     struct vm_page *center, int flags, voff_t mlo, voff_t mhi)
  351 {
  352         struct vm_page **ppsp, *pclust;
  353         voff_t lo, hi, curoff;
  354         int center_idx, forward, incr;
  355 
  356         /* 
  357          * center page should already be busy and write protected.  XXX:
  358          * suppose page is wired?  if we lock, then a process could
  359          * fault/block on it.  if we don't lock, a process could write the
  360          * pages in the middle of an I/O.  (consider an msync()).  let's
  361          * lock it for now (better to delay than corrupt data?).
  362          */
  363         /* get cluster boundaries, check sanity, and apply our limits as well.*/
  364         uobj->pgops->pgo_cluster(uobj, center->offset, &lo, &hi);
  365         if ((flags & PGO_ALLPAGES) == 0) {
  366                 if (lo < mlo)
  367                         lo = mlo;
  368                 if (hi > mhi)
  369                         hi = mhi;
  370         }
  371         if ((hi - lo) >> PAGE_SHIFT > *npages) { /* pps too small, bail out! */
  372                 pps[0] = center;
  373                 *npages = 1;
  374                 return pps;
  375         }
  376 
  377         /* now determine the center and attempt to cluster around the edges */
  378         center_idx = (center->offset - lo) >> PAGE_SHIFT;
  379         pps[center_idx] = center;       /* plug in the center page */
  380         ppsp = &pps[center_idx];
  381         *npages = 1;
  382 
  383         /*
  384          * attempt to cluster around the left [backward], and then 
  385          * the right side [forward].    
  386          *
  387          * note that for inactive pages (pages that have been deactivated)
  388          * there are no valid mappings and PG_CLEAN should be up to date.
  389          * [i.e. there is no need to query the pmap with pmap_is_modified
  390          * since there are no mappings].
  391          */
  392         for (forward  = 0 ; forward <= 1 ; forward++) {
  393                 incr = forward ? PAGE_SIZE : -PAGE_SIZE;
  394                 curoff = center->offset + incr;
  395                 for ( ;(forward == 0 && curoff >= lo) ||
  396                        (forward && curoff < hi);
  397                       curoff += incr) {
  398 
  399                         pclust = uvm_pagelookup(uobj, curoff); /* lookup page */
  400                         if (pclust == NULL) {
  401                                 break;                  /* no page */
  402                         }
  403                         /* handle active pages */
  404                         /* NOTE: inactive pages don't have pmap mappings */
  405                         if ((pclust->pg_flags & PQ_INACTIVE) == 0) {
  406                                 if ((flags & PGO_DOACTCLUST) == 0) {
  407                                         /* dont want mapped pages at all */
  408                                         break;
  409                                 }
  410 
  411                                 /* make sure "clean" bit is sync'd */
  412                                 if ((pclust->pg_flags & PG_CLEANCHK) == 0) {
  413                                         if ((pclust->pg_flags & (PG_CLEAN|PG_BUSY))
  414                                            == PG_CLEAN &&
  415                                            pmap_is_modified(pclust))
  416                                                 atomic_clearbits_int(
  417                                                     &pclust->pg_flags,
  418                                                     PG_CLEAN);
  419                                         /* now checked */
  420                                         atomic_setbits_int(&pclust->pg_flags,
  421                                             PG_CLEANCHK);
  422                                 }
  423                         }
  424 
  425                         /* is page available for cleaning and does it need it */
  426                         if ((pclust->pg_flags & (PG_CLEAN|PG_BUSY)) != 0) {
  427                                 break;  /* page is already clean or is busy */
  428                         }
  429 
  430                         /* yes!   enroll the page in our array */
  431                         atomic_setbits_int(&pclust->pg_flags, PG_BUSY);
  432                         UVM_PAGE_OWN(pclust, "uvm_mk_pcluster");
  433 
  434                         /*
  435                          * If we want to free after io is done, and we're
  436                          * async, set the released flag
  437                          */
  438                         if ((flags & (PGO_FREE|PGO_SYNCIO)) == PGO_FREE)
  439                                 atomic_setbits_int(&pclust->pg_flags,
  440                                     PG_RELEASED);
  441 
  442                         /* XXX: protect wired page?   see above comment. */
  443                         pmap_page_protect(pclust, PROT_READ);
  444                         if (!forward) {
  445                                 ppsp--;                 /* back up one page */
  446                                 *ppsp = pclust;
  447                         } else {
  448                                 /* move forward one page */
  449                                 ppsp[*npages] = pclust;
  450                         }
  451                         (*npages)++;
  452                 }
  453         }
  454         
  455         /*
  456          * done!  return the cluster array to the caller!!!
  457          */
  458         return ppsp;
  459 }
  460 
  461 /*
  462  * uvm_pager_put: high level pageout routine
  463  *
  464  * we want to pageout page "pg" to backing store, clustering if
  465  * possible.
  466  *
  467  * => page queues must be locked by caller
  468  * => if page is not swap-backed, then "uobj" points to the object
  469  *      backing it.
  470  * => if page is swap-backed, then "uobj" should be NULL.
  471  * => "pg" should be PG_BUSY (by caller), and !PG_CLEAN
  472  *    for swap-backed memory, "pg" can be NULL if there is no page
  473  *    of interest [sometimes the case for the pagedaemon]
  474  * => "ppsp_ptr" should point to an array of npages vm_page pointers
  475  *      for possible cluster building
  476  * => flags (first two for non-swap-backed pages)
  477  *      PGO_ALLPAGES: all pages in uobj are valid targets
  478  *      PGO_DOACTCLUST: include "PQ_ACTIVE" pages as valid targets
  479  *      PGO_SYNCIO: do SYNC I/O (no async)
  480  *      PGO_PDFREECLUST: pagedaemon: drop cluster on successful I/O
  481  *      PGO_FREE: tell the aio daemon to free pages in the async case.
  482  * => start/stop: if (uobj && !PGO_ALLPAGES) limit targets to this range
  483  *                if (!uobj) start is the (daddr_t) of the starting swapblk
  484  * => return state:
  485  *      1. we return the VM_PAGER status code of the pageout
  486  *      2. we return with the page queues unlocked
  487  *      3. on errors we always drop the cluster.   thus, if we return
  488  *              !PEND, !OK, then the caller only has to worry about
  489  *              un-busying the main page (not the cluster pages).
  490  *      4. on success, if !PGO_PDFREECLUST, we return the cluster
  491  *              with all pages busy (caller must un-busy and check
  492  *              wanted/released flags).
  493  */
  494 int
  495 uvm_pager_put(struct uvm_object *uobj, struct vm_page *pg,
  496     struct vm_page ***ppsp_ptr, int *npages, int flags,
  497     voff_t start, voff_t stop)
  498 {
  499         int result;
  500         daddr_t swblk;
  501         struct vm_page **ppsp = *ppsp_ptr;
  502 
  503         /*
  504          * note that uobj is null  if we are doing a swap-backed pageout.
  505          * note that uobj is !null if we are doing normal object pageout.
  506          * note that the page queues must be locked to cluster.
  507          */
  508         if (uobj) {     /* if !swap-backed */
  509                 /*
  510                  * attempt to build a cluster for pageout using its
  511                  * make-put-cluster function (if it has one).
  512                  */
  513                 if (uobj->pgops->pgo_mk_pcluster) {
  514                         ppsp = uobj->pgops->pgo_mk_pcluster(uobj, ppsp,
  515                             npages, pg, flags, start, stop);
  516                         *ppsp_ptr = ppsp;  /* update caller's pointer */
  517                 } else {
  518                         ppsp[0] = pg;
  519                         *npages = 1;
  520                 }
  521 
  522                 swblk = 0;              /* XXX: keep gcc happy */
  523         } else {
  524                 /*
  525                  * for swap-backed pageout, the caller (the pagedaemon) has
  526                  * already built the cluster for us.   the starting swap
  527                  * block we are writing to has been passed in as "start."
  528                  * "pg" could be NULL if there is no page we are especially
  529                  * interested in (in which case the whole cluster gets dropped
  530                  * in the event of an error or a sync "done").
  531                  */
  532                 swblk = start;
  533                 /* ppsp and npages should be ok */
  534         }
  535 
  536         /* now that we've clustered we can unlock the page queues */
  537         uvm_unlock_pageq();
  538 
  539         /*
  540          * now attempt the I/O.   if we have a failure and we are
  541          * clustered, we will drop the cluster and try again.
  542          */
  543 ReTry:
  544         if (uobj) {
  545                 result = uobj->pgops->pgo_put(uobj, ppsp, *npages, flags);
  546         } else {
  547                 /* XXX daddr_t -> int */
  548                 result = uvm_swap_put(swblk, ppsp, *npages, flags);
  549         }
  550 
  551         /*
  552          * we have attempted the I/O.
  553          *
  554          * if the I/O was a success then:
  555          *      if !PGO_PDFREECLUST, we return the cluster to the 
  556          *              caller (who must un-busy all pages)
  557          *      else we un-busy cluster pages for the pagedaemon
  558          *
  559          * if I/O is pending (async i/o) then we return the pending code.
  560          * [in this case the async i/o done function must clean up when
  561          *  i/o is done...]
  562          */
  563         if (result == VM_PAGER_PEND || result == VM_PAGER_OK) {
  564                 if (result == VM_PAGER_OK && (flags & PGO_PDFREECLUST)) {
  565                         /* drop cluster */
  566                         if (*npages > 1 || pg == NULL)
  567                                 uvm_pager_dropcluster(uobj, pg, ppsp, npages,
  568                                     PGO_PDFREECLUST);
  569                 }
  570                 return (result);
  571         }
  572 
  573         /*
  574          * a pager error occurred (even after dropping the cluster, if there
  575          * was one).  give up! the caller only has one page ("pg")
  576          * to worry about.
  577          */
  578         if (*npages > 1 || pg == NULL) {
  579                 uvm_pager_dropcluster(uobj, pg, ppsp, npages, PGO_REALLOCSWAP);
  580 
  581                 /*
  582                  * for failed swap-backed pageouts with a "pg",
  583                  * we need to reset pg's swslot to either:
  584                  * "swblk" (for transient errors, so we can retry),
  585                  * or 0 (for hard errors).
  586                  */
  587                 if (uobj == NULL && pg != NULL) {
  588                         /* XXX daddr_t -> int */
  589                         int nswblk = (result == VM_PAGER_AGAIN) ? swblk : 0;
  590                         if (pg->pg_flags & PQ_ANON) {
  591                                 rw_enter(pg->uanon->an_lock, RW_WRITE);
  592                                 pg->uanon->an_swslot = nswblk;
  593                                 rw_exit(pg->uanon->an_lock);
  594                         } else {
  595                                 rw_enter(pg->uobject->vmobjlock, RW_WRITE);
  596                                 uao_set_swslot(pg->uobject,
  597                                                pg->offset >> PAGE_SHIFT,
  598                                                nswblk);
  599                                 rw_exit(pg->uobject->vmobjlock);
  600                         }
  601                 }
  602                 if (result == VM_PAGER_AGAIN) {
  603                         /*
  604                          * for transient failures, free all the swslots that
  605                          * we're not going to retry with.
  606                          */
  607                         if (uobj == NULL) {
  608                                 if (pg) {
  609                                         /* XXX daddr_t -> int */
  610                                         uvm_swap_free(swblk + 1, *npages - 1);
  611                                 } else {
  612                                         /* XXX daddr_t -> int */
  613                                         uvm_swap_free(swblk, *npages);
  614                                 }
  615                         }
  616                         if (pg) {
  617                                 ppsp[0] = pg;
  618                                 *npages = 1;
  619                                 goto ReTry;
  620                         }
  621                 } else if (uobj == NULL) {
  622                         /*
  623                          * for hard errors on swap-backed pageouts,
  624                          * mark the swslots as bad.  note that we do not
  625                          * free swslots that we mark bad.
  626                          */
  627                         /* XXX daddr_t -> int */
  628                         uvm_swap_markbad(swblk, *npages);
  629                 }
  630         }
  631 
  632         /*
  633          * a pager error occurred (even after dropping the cluster, if there
  634          * was one).    give up!   the caller only has one page ("pg")
  635          * to worry about.
  636          */
  637         
  638         return result;
  639 }
  640 
  641 /*
  642  * uvm_pager_dropcluster: drop a cluster we have built (because we 
  643  * got an error, or, if PGO_PDFREECLUST we are un-busying the
  644  * cluster pages on behalf of the pagedaemon).
  645  *
  646  * => uobj, if non-null, is a non-swap-backed object
  647  * => page queues are not locked
  648  * => pg is our page of interest (the one we clustered around, can be null)
  649  * => ppsp/npages is our current cluster
  650  * => flags: PGO_PDFREECLUST: pageout was a success: un-busy cluster
  651  *      pages on behalf of the pagedaemon.
  652  *           PGO_REALLOCSWAP: drop previously allocated swap slots for 
  653  *              clustered swap-backed pages (except for "pg" if !NULL)
  654  *              "swblk" is the start of swap alloc (e.g. for ppsp[0])
  655  *              [only meaningful if swap-backed (uobj == NULL)]
  656  */
  657 
  658 void
  659 uvm_pager_dropcluster(struct uvm_object *uobj, struct vm_page *pg,
  660     struct vm_page **ppsp, int *npages, int flags)
  661 {
  662         int lcv;
  663 
  664         KASSERT(uobj == NULL || rw_write_held(uobj->vmobjlock));
  665 
  666         /* drop all pages but "pg" */
  667         for (lcv = 0 ; lcv < *npages ; lcv++) {
  668                 /* skip "pg" or empty slot */
  669                 if (ppsp[lcv] == pg || ppsp[lcv] == NULL)
  670                         continue;
  671         
  672                 /*
  673                  * Note that PQ_ANON bit can't change as long as we are holding
  674                  * the PG_BUSY bit (so there is no need to lock the page
  675                  * queues to test it).
  676                  */
  677                 if (!uobj) {
  678                         if (ppsp[lcv]->pg_flags & PQ_ANON) {
  679                                 rw_enter(ppsp[lcv]->uanon->an_lock, RW_WRITE);
  680                                 if (flags & PGO_REALLOCSWAP)
  681                                           /* zap swap block */
  682                                           ppsp[lcv]->uanon->an_swslot = 0;
  683                         } else {
  684                                 rw_enter(ppsp[lcv]->uobject->vmobjlock,
  685                                     RW_WRITE);
  686                                 if (flags & PGO_REALLOCSWAP)
  687                                         uao_set_swslot(ppsp[lcv]->uobject,
  688                                             ppsp[lcv]->offset >> PAGE_SHIFT, 0);
  689                         }
  690                 }
  691 
  692                 /* did someone want the page while we had it busy-locked? */
  693                 if (ppsp[lcv]->pg_flags & PG_WANTED) {
  694                         wakeup(ppsp[lcv]);
  695                 }
  696 
  697                 /* if page was released, release it.  otherwise un-busy it */
  698                 if (ppsp[lcv]->pg_flags & PG_RELEASED &&
  699                     ppsp[lcv]->pg_flags & PQ_ANON) {
  700                                 /* kills anon and frees pg */
  701                                 uvm_anon_release(ppsp[lcv]->uanon);
  702                                 continue;
  703                 } else {
  704                         /*
  705                          * if we were planning on async io then we would
  706                          * have PG_RELEASED set, clear that with the others.
  707                          */
  708                         atomic_clearbits_int(&ppsp[lcv]->pg_flags,
  709                             PG_BUSY|PG_WANTED|PG_FAKE|PG_RELEASED);
  710                         UVM_PAGE_OWN(ppsp[lcv], NULL);
  711                 }
  712 
  713                 /*
  714                  * if we are operating on behalf of the pagedaemon and we 
  715                  * had a successful pageout update the page!
  716                  */
  717                 if (flags & PGO_PDFREECLUST) {
  718                         pmap_clear_reference(ppsp[lcv]);
  719                         pmap_clear_modify(ppsp[lcv]);
  720                         atomic_setbits_int(&ppsp[lcv]->pg_flags, PG_CLEAN);
  721                 }
  722 
  723                 /* if anonymous cluster, unlock object and move on */
  724                 if (!uobj) {
  725                         if (ppsp[lcv]->pg_flags & PQ_ANON)
  726                                 rw_exit(ppsp[lcv]->uanon->an_lock);
  727                         else
  728                                 rw_exit(ppsp[lcv]->uobject->vmobjlock);
  729                 }
  730         }
  731 }
  732 
  733 /*
  734  * interrupt-context iodone handler for single-buf i/os
  735  * or the top-level buf of a nested-buf i/o.
  736  *
  737  * => must be at splbio().
  738  */
  739 
  740 void
  741 uvm_aio_biodone(struct buf *bp)
  742 {
  743         splassert(IPL_BIO);
  744 
  745         /* reset b_iodone for when this is a single-buf i/o. */
  746         bp->b_iodone = uvm_aio_aiodone;
  747 
  748         mtx_enter(&uvm.aiodoned_lock);
  749         TAILQ_INSERT_TAIL(&uvm.aio_done, bp, b_freelist);
  750         wakeup(&uvm.aiodoned);
  751         mtx_leave(&uvm.aiodoned_lock);
  752 }
  753 
  754 void
  755 uvm_aio_aiodone_pages(struct vm_page **pgs, int npages, boolean_t write,
  756     int error)
  757 {
  758         struct vm_page *pg;
  759         struct uvm_object *uobj;
  760         boolean_t swap;
  761         int i;
  762 
  763         uobj = NULL;
  764 
  765         for (i = 0; i < npages; i++) {
  766                 pg = pgs[i];
  767 
  768                 if (i == 0) {
  769                         swap = (pg->pg_flags & PQ_SWAPBACKED) != 0;
  770                         if (!swap) {
  771                                 uobj = pg->uobject;
  772                                 rw_enter(uobj->vmobjlock, RW_WRITE);
  773                         }
  774                 }
  775                 KASSERT(swap || pg->uobject == uobj);
  776 
  777                 /*
  778                  * if this is a read and we got an error, mark the pages
  779                  * PG_RELEASED so that uvm_page_unbusy() will free them.
  780                  */
  781                 if (!write && error) {
  782                         atomic_setbits_int(&pg->pg_flags, PG_RELEASED);
  783                         continue;
  784                 }
  785                 KASSERT(!write || (pgs[i]->pg_flags & PG_FAKE) == 0);
  786 
  787                 /*
  788                  * if this is a read and the page is PG_FAKE,
  789                  * or this was a successful write,
  790                  * mark the page PG_CLEAN and not PG_FAKE.
  791                  */
  792                 if ((pgs[i]->pg_flags & PG_FAKE) || (write && error != ENOMEM)) {
  793                         pmap_clear_reference(pgs[i]);
  794                         pmap_clear_modify(pgs[i]);
  795                         atomic_setbits_int(&pgs[i]->pg_flags, PG_CLEAN);
  796                         atomic_clearbits_int(&pgs[i]->pg_flags, PG_FAKE);
  797                 }
  798         }
  799         uvm_page_unbusy(pgs, npages);
  800         if (!swap) {
  801                 rw_exit(uobj->vmobjlock);
  802         }
  803 }
  804 
  805 /*
  806  * uvm_aio_aiodone: do iodone processing for async i/os.
  807  * this should be called in thread context, not interrupt context.
  808  */
  809 void
  810 uvm_aio_aiodone(struct buf *bp)
  811 {
  812         int npages = bp->b_bufsize >> PAGE_SHIFT;
  813         struct vm_page *pgs[MAXPHYS >> PAGE_SHIFT];
  814         int i, error;
  815         boolean_t write;
  816 
  817         KASSERT(npages <= MAXPHYS >> PAGE_SHIFT);
  818         splassert(IPL_BIO);
  819 
  820         error = (bp->b_flags & B_ERROR) ? (bp->b_error ? bp->b_error : EIO) : 0;
  821         write = (bp->b_flags & B_READ) == 0;
  822 
  823         for (i = 0; i < npages; i++)
  824                 pgs[i] = uvm_atopg((vaddr_t)bp->b_data +
  825                     ((vsize_t)i << PAGE_SHIFT));
  826         uvm_pagermapout((vaddr_t)bp->b_data, npages);
  827 #ifdef UVM_SWAP_ENCRYPT
  828         /*
  829          * XXX - assumes that we only get ASYNC writes. used to be above.
  830          */
  831         if (pgs[0]->pg_flags & PQ_ENCRYPT) {
  832                 uvm_swap_freepages(pgs, npages);
  833                 goto freed;
  834         }
  835 #endif /* UVM_SWAP_ENCRYPT */
  836 
  837         uvm_aio_aiodone_pages(pgs, npages, write, error);
  838 
  839 #ifdef UVM_SWAP_ENCRYPT
  840 freed:
  841 #endif
  842         pool_put(&bufpool, bp);
  843 }

Cache object: c0195fe5849d74adea444c660d71eb63


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.