uvm_pdaemon.c

Version: - FREEBSD - FREEBSD-13-STABLE - FREEBSD-13-0 - FREEBSD-12-STABLE - FREEBSD-12-0 - FREEBSD-11-STABLE - FREEBSD-11-0 - FREEBSD-10-STABLE - FREEBSD-10-0 - FREEBSD-9-STABLE - FREEBSD-9-0 - FREEBSD-8-STABLE - FREEBSD-8-0 - FREEBSD-7-STABLE - FREEBSD-7-0 - FREEBSD-6-STABLE - FREEBSD-6-0 - FREEBSD-5-STABLE - FREEBSD-5-0 - FREEBSD-4-STABLE - FREEBSD-3-STABLE - FREEBSD22 - l41 - OPENBSD - linux-2.6 - MK84 - PLAN9 - xnu-8792
SearchContext: - none - 3 - 10
    1 /*      $NetBSD: uvm_pdaemon.c,v 1.93.4.2 2009/02/02 19:24:04 snj Exp $ */
    2 
    3 /*
    4  * Copyright (c) 1997 Charles D. Cranor and Washington University.
    5  * Copyright (c) 1991, 1993, The Regents of the University of California.
    6  *
    7  * All rights reserved.
    8  *
    9  * This code is derived from software contributed to Berkeley by
   10  * The Mach Operating System project at Carnegie-Mellon University.
   11  *
   12  * Redistribution and use in source and binary forms, with or without
   13  * modification, are permitted provided that the following conditions
   14  * are met:
   15  * 1. Redistributions of source code must retain the above copyright
   16  *    notice, this list of conditions and the following disclaimer.
   17  * 2. Redistributions in binary form must reproduce the above copyright
   18  *    notice, this list of conditions and the following disclaimer in the
   19  *    documentation and/or other materials provided with the distribution.
   20  * 3. All advertising materials mentioning features or use of this software
   21  *    must display the following acknowledgement:
   22  *      This product includes software developed by Charles D. Cranor,
   23  *      Washington University, the University of California, Berkeley and
   24  *      its contributors.
   25  * 4. Neither the name of the University nor the names of its contributors
   26  *    may be used to endorse or promote products derived from this software
   27  *    without specific prior written permission.
   28  *
   29  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   30  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   31  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   32  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   33  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   34  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   35  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   36  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   37  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   38  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   39  * SUCH DAMAGE.
   40  *
   41  *      @(#)vm_pageout.c        8.5 (Berkeley) 2/14/94
   42  * from: Id: uvm_pdaemon.c,v 1.1.2.32 1998/02/06 05:26:30 chs Exp
   43  *
   44  *
   45  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
   46  * All rights reserved.
   47  *
   48  * Permission to use, copy, modify and distribute this software and
   49  * its documentation is hereby granted, provided that both the copyright
   50  * notice and this permission notice appear in all copies of the
   51  * software, derivative works or modified versions, and any portions
   52  * thereof, and that both notices appear in supporting documentation.
   53  *
   54  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
   55  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
   56  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
   57  *
   58  * Carnegie Mellon requests users of this software to return to
   59  *
   60  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
   61  *  School of Computer Science
   62  *  Carnegie Mellon University
   63  *  Pittsburgh PA 15213-3890
   64  *
   65  * any improvements or extensions that they make and grant Carnegie the
   66  * rights to redistribute these changes.
   67  */
   68 
   69 /*
   70  * uvm_pdaemon.c: the page daemon
   71  */
   72 
   73 #include <sys/cdefs.h>
   74 __KERNEL_RCSID(0, "$NetBSD: uvm_pdaemon.c,v 1.93.4.2 2009/02/02 19:24:04 snj Exp $");
   75 
   76 #include "opt_uvmhist.h"
   77 #include "opt_readahead.h"
   78 
   79 #include <sys/param.h>
   80 #include <sys/proc.h>
   81 #include <sys/systm.h>
   82 #include <sys/kernel.h>
   83 #include <sys/pool.h>
   84 #include <sys/buf.h>
   85 #include <sys/atomic.h>
   86 
   87 #include <uvm/uvm.h>
   88 #include <uvm/uvm_pdpolicy.h>
   89 
   90 /*
   91  * UVMPD_NUMDIRTYREACTS is how many dirty pages the pagedaemon will reactivate
   92  * in a pass thru the inactive list when swap is full.  the value should be
   93  * "small"... if it's too large we'll cycle the active pages thru the inactive
   94  * queue too quickly to for them to be referenced and avoid being freed.
   95  */
   96 
   97 #define UVMPD_NUMDIRTYREACTS    16
   98 
   99 #define UVMPD_NUMTRYLOCKOWNER   16
  100 
  101 /*
  102  * local prototypes
  103  */
  104 
  105 static void     uvmpd_scan(void);
  106 static void     uvmpd_scan_queue(void);
  107 static void     uvmpd_tune(void);
  108 
  109 unsigned int uvm_pagedaemon_waiters;
  110 
  111 /*
  112  * XXX hack to avoid hangs when large processes fork.
  113  */
  114 u_int uvm_extrapages;
  115 
  116 /*
  117  * uvm_wait: wait (sleep) for the page daemon to free some pages
  118  *
  119  * => should be called with all locks released
  120  * => should _not_ be called by the page daemon (to avoid deadlock)
  121  */
  122 
  123 void
  124 uvm_wait(const char *wmsg)
  125 {
  126         int timo = 0;
  127 
  128         mutex_spin_enter(&uvm_fpageqlock);
  129 
  130         /*
  131          * check for page daemon going to sleep (waiting for itself)
  132          */
  133 
  134         if (curlwp == uvm.pagedaemon_lwp && uvmexp.paging == 0) {
  135                 /*
  136                  * now we have a problem: the pagedaemon wants to go to
  137                  * sleep until it frees more memory.   but how can it
  138                  * free more memory if it is asleep?  that is a deadlock.
  139                  * we have two options:
  140                  *  [1] panic now
  141                  *  [2] put a timeout on the sleep, thus causing the
  142                  *      pagedaemon to only pause (rather than sleep forever)
  143                  *
  144                  * note that option [2] will only help us if we get lucky
  145                  * and some other process on the system breaks the deadlock
  146                  * by exiting or freeing memory (thus allowing the pagedaemon
  147                  * to continue).  for now we panic if DEBUG is defined,
  148                  * otherwise we hope for the best with option [2] (better
  149                  * yet, this should never happen in the first place!).
  150                  */
  151 
  152                 printf("pagedaemon: deadlock detected!\n");
  153                 timo = hz >> 3;         /* set timeout */
  154 #if defined(DEBUG)
  155                 /* DEBUG: panic so we can debug it */
  156                 panic("pagedaemon deadlock");
  157 #endif
  158         }
  159 
  160         uvm_pagedaemon_waiters++;
  161         wakeup(&uvm.pagedaemon);                /* wake the daemon! */
  162         UVM_UNLOCK_AND_WAIT(&uvmexp.free, &uvm_fpageqlock, false, wmsg, timo);
  163 }
  164 
  165 /*
  166  * uvm_kick_pdaemon: perform checks to determine if we need to
  167  * give the pagedaemon a nudge, and do so if necessary.
  168  *
  169  * => called with uvm_fpageqlock held.
  170  */
  171 
  172 void
  173 uvm_kick_pdaemon(void)
  174 {
  175 
  176         KASSERT(mutex_owned(&uvm_fpageqlock));
  177 
  178         if (uvmexp.free + uvmexp.paging < uvmexp.freemin ||
  179             (uvmexp.free + uvmexp.paging < uvmexp.freetarg &&
  180              uvmpdpol_needsscan_p())) {
  181                 wakeup(&uvm.pagedaemon);
  182         }
  183 }
  184 
  185 /*
  186  * uvmpd_tune: tune paging parameters
  187  *
  188  * => called when ever memory is added (or removed?) to the system
  189  * => caller must call with page queues locked
  190  */
  191 
  192 static void
  193 uvmpd_tune(void)
  194 {
  195         int val;
  196 
  197         UVMHIST_FUNC("uvmpd_tune"); UVMHIST_CALLED(pdhist);
  198 
  199         /*
  200          * try to keep 0.5% of available RAM free, but limit to between
  201          * 128k and 1024k per-CPU.  XXX: what are these values good for?
  202          */
  203         val = uvmexp.npages / 200;
  204         val = MAX(val, (128*1024) >> PAGE_SHIFT);
  205         val = MIN(val, (1024*1024) >> PAGE_SHIFT);
  206         val *= ncpu;
  207 
  208         /* Make sure there's always a user page free. */
  209         if (val < uvmexp.reserve_kernel + 1)
  210                 val = uvmexp.reserve_kernel + 1;
  211         uvmexp.freemin = val;
  212 
  213         /* Calculate free target. */
  214         val = (uvmexp.freemin * 4) / 3;
  215         if (val <= uvmexp.freemin)
  216                 val = uvmexp.freemin + 1;
  217         uvmexp.freetarg = val + atomic_swap_uint(&uvm_extrapages, 0);
  218 
  219         uvmexp.wiredmax = uvmexp.npages / 3;
  220         UVMHIST_LOG(pdhist, "<- done, freemin=%d, freetarg=%d, wiredmax=%d",
  221               uvmexp.freemin, uvmexp.freetarg, uvmexp.wiredmax, 0);
  222 }
  223 
  224 /*
  225  * uvm_pageout: the main loop for the pagedaemon
  226  */
  227 
  228 void
  229 uvm_pageout(void *arg)
  230 {
  231         int bufcnt, npages = 0;
  232         int extrapages = 0;
  233         struct pool *pp;
  234         uint64_t where;
  235         UVMHIST_FUNC("uvm_pageout"); UVMHIST_CALLED(pdhist);
  236 
  237         UVMHIST_LOG(pdhist,"<starting uvm pagedaemon>", 0, 0, 0, 0);
  238 
  239         /*
  240          * ensure correct priority and set paging parameters...
  241          */
  242 
  243         uvm.pagedaemon_lwp = curlwp;
  244         mutex_enter(&uvm_pageqlock);
  245         npages = uvmexp.npages;
  246         uvmpd_tune();
  247         mutex_exit(&uvm_pageqlock);
  248 
  249         /*
  250          * main loop
  251          */
  252 
  253         for (;;) {
  254                 bool needsscan, needsfree;
  255 
  256                 mutex_spin_enter(&uvm_fpageqlock);
  257                 if (uvm_pagedaemon_waiters == 0 || uvmexp.paging > 0) {
  258                         UVMHIST_LOG(pdhist,"  <<SLEEPING>>",0,0,0,0);
  259                         UVM_UNLOCK_AND_WAIT(&uvm.pagedaemon,
  260                             &uvm_fpageqlock, false, "pgdaemon", 0);
  261                         uvmexp.pdwoke++;
  262                         UVMHIST_LOG(pdhist,"  <<WOKE UP>>",0,0,0,0);
  263                 } else {
  264                         mutex_spin_exit(&uvm_fpageqlock);
  265                 }
  266 
  267                 /*
  268                  * now lock page queues and recompute inactive count
  269                  */
  270 
  271                 mutex_enter(&uvm_pageqlock);
  272                 if (npages != uvmexp.npages || extrapages != uvm_extrapages) {
  273                         npages = uvmexp.npages;
  274                         extrapages = uvm_extrapages;
  275                         mutex_spin_enter(&uvm_fpageqlock);
  276                         uvmpd_tune();
  277                         mutex_spin_exit(&uvm_fpageqlock);
  278                 }
  279 
  280                 uvmpdpol_tune();
  281 
  282                 /*
  283                  * Estimate a hint.  Note that bufmem are returned to
  284                  * system only when entire pool page is empty.
  285                  */
  286                 mutex_spin_enter(&uvm_fpageqlock);
  287                 bufcnt = uvmexp.freetarg - uvmexp.free;
  288                 if (bufcnt < 0)
  289                         bufcnt = 0;
  290 
  291                 UVMHIST_LOG(pdhist,"  free/ftarg=%d/%d",
  292                     uvmexp.free, uvmexp.freetarg, 0,0);
  293 
  294                 needsfree = uvmexp.free + uvmexp.paging < uvmexp.freetarg;
  295                 needsscan = needsfree || uvmpdpol_needsscan_p();
  296 
  297                 /*
  298                  * scan if needed
  299                  */
  300                 if (needsscan) {
  301                         mutex_spin_exit(&uvm_fpageqlock);
  302                         uvmpd_scan();
  303                         mutex_spin_enter(&uvm_fpageqlock);
  304                 }
  305 
  306                 /*
  307                  * if there's any free memory to be had,
  308                  * wake up any waiters.
  309                  */
  310                 if (uvmexp.free > uvmexp.reserve_kernel ||
  311                     uvmexp.paging == 0) {
  312                         wakeup(&uvmexp.free);
  313                         uvm_pagedaemon_waiters = 0;
  314                 }
  315                 mutex_spin_exit(&uvm_fpageqlock);
  316 
  317                 /*
  318                  * scan done.  unlock page queues (the only lock we are holding)
  319                  */
  320                 mutex_exit(&uvm_pageqlock);
  321 
  322                 /*
  323                  * if we don't need free memory, we're done.
  324                  */
  325 
  326                 if (!needsfree) 
  327                         continue;
  328 
  329                 /*
  330                  * start draining pool resources now that we're not
  331                  * holding any locks.
  332                  */
  333                 pool_drain_start(&pp, &where);
  334 
  335                 /*
  336                  * kill unused metadata buffers.
  337                  */
  338                 mutex_enter(&bufcache_lock);
  339                 buf_drain(bufcnt << PAGE_SHIFT);
  340                 mutex_exit(&bufcache_lock);
  341 
  342                 /*
  343                  * complete draining the pools.
  344                  */
  345                 pool_drain_end(pp, where);
  346         }
  347         /*NOTREACHED*/
  348 }
  349 
  350 
  351 /*
  352  * uvm_aiodone_worker: a workqueue callback for the aiodone daemon.
  353  */
  354 
  355 void
  356 uvm_aiodone_worker(struct work *wk, void *dummy)
  357 {
  358         struct buf *bp = (void *)wk;
  359 
  360         KASSERT(&bp->b_work == wk);
  361 
  362         /*
  363          * process an i/o that's done.
  364          */
  365 
  366         (*bp->b_iodone)(bp);
  367 }
  368 
  369 void
  370 uvm_pageout_start(int npages)
  371 {
  372 
  373         mutex_spin_enter(&uvm_fpageqlock);
  374         uvmexp.paging += npages;
  375         mutex_spin_exit(&uvm_fpageqlock);
  376 }
  377 
  378 void
  379 uvm_pageout_done(int npages)
  380 {
  381 
  382         mutex_spin_enter(&uvm_fpageqlock);
  383         KASSERT(uvmexp.paging >= npages);
  384         uvmexp.paging -= npages;
  385 
  386         /*
  387          * wake up either of pagedaemon or LWPs waiting for it.
  388          */
  389 
  390         if (uvmexp.free <= uvmexp.reserve_kernel) {
  391                 wakeup(&uvm.pagedaemon);
  392         } else {
  393                 wakeup(&uvmexp.free);
  394                 uvm_pagedaemon_waiters = 0;
  395         }
  396         mutex_spin_exit(&uvm_fpageqlock);
  397 }
  398 
  399 /*
  400  * uvmpd_trylockowner: trylock the page's owner.
  401  *
  402  * => called with pageq locked.
  403  * => resolve orphaned O->A loaned page.
  404  * => return the locked mutex on success.  otherwise, return NULL.
  405  */
  406 
  407 kmutex_t *
  408 uvmpd_trylockowner(struct vm_page *pg)
  409 {
  410         struct uvm_object *uobj = pg->uobject;
  411         kmutex_t *slock;
  412 
  413         KASSERT(mutex_owned(&uvm_pageqlock));
  414 
  415         if (uobj != NULL) {
  416                 slock = &uobj->vmobjlock;
  417         } else {
  418                 struct vm_anon *anon = pg->uanon;
  419 
  420                 KASSERT(anon != NULL);
  421                 slock = &anon->an_lock;
  422         }
  423 
  424         if (!mutex_tryenter(slock)) {
  425                 return NULL;
  426         }
  427 
  428         if (uobj == NULL) {
  429 
  430                 /*
  431                  * set PQ_ANON if it isn't set already.
  432                  */
  433 
  434                 if ((pg->pqflags & PQ_ANON) == 0) {
  435                         KASSERT(pg->loan_count > 0);
  436                         pg->loan_count--;
  437                         pg->pqflags |= PQ_ANON;
  438                         /* anon now owns it */
  439                 }
  440         }
  441 
  442         return slock;
  443 }
  444 
  445 #if defined(VMSWAP)
  446 struct swapcluster {
  447         int swc_slot;
  448         int swc_nallocated;
  449         int swc_nused;
  450         struct vm_page *swc_pages[howmany(MAXPHYS, MIN_PAGE_SIZE)];
  451 };
  452 
  453 static void
  454 swapcluster_init(struct swapcluster *swc)
  455 {
  456 
  457         swc->swc_slot = 0;
  458         swc->swc_nused = 0;
  459 }
  460 
  461 static int
  462 swapcluster_allocslots(struct swapcluster *swc)
  463 {
  464         int slot;
  465         int npages;
  466 
  467         if (swc->swc_slot != 0) {
  468                 return 0;
  469         }
  470 
  471         /* Even with strange MAXPHYS, the shift
  472            implicitly rounds down to a page. */
  473         npages = MAXPHYS >> PAGE_SHIFT;
  474         slot = uvm_swap_alloc(&npages, true);
  475         if (slot == 0) {
  476                 return ENOMEM;
  477         }
  478         swc->swc_slot = slot;
  479         swc->swc_nallocated = npages;
  480         swc->swc_nused = 0;
  481 
  482         return 0;
  483 }
  484 
  485 static int
  486 swapcluster_add(struct swapcluster *swc, struct vm_page *pg)
  487 {
  488         int slot;
  489         struct uvm_object *uobj;
  490 
  491         KASSERT(swc->swc_slot != 0);
  492         KASSERT(swc->swc_nused < swc->swc_nallocated);
  493         KASSERT((pg->pqflags & PQ_SWAPBACKED) != 0);
  494 
  495         slot = swc->swc_slot + swc->swc_nused;
  496         uobj = pg->uobject;
  497         if (uobj == NULL) {
  498                 KASSERT(mutex_owned(&pg->uanon->an_lock));
  499                 pg->uanon->an_swslot = slot;
  500         } else {
  501                 int result;
  502 
  503                 KASSERT(mutex_owned(&uobj->vmobjlock));
  504                 result = uao_set_swslot(uobj, pg->offset >> PAGE_SHIFT, slot);
  505                 if (result == -1) {
  506                         return ENOMEM;
  507                 }
  508         }
  509         swc->swc_pages[swc->swc_nused] = pg;
  510         swc->swc_nused++;
  511 
  512         return 0;
  513 }
  514 
  515 static void
  516 swapcluster_flush(struct swapcluster *swc, bool now)
  517 {
  518         int slot;
  519         int nused;
  520         int nallocated;
  521         int error;
  522 
  523         if (swc->swc_slot == 0) {
  524                 return;
  525         }
  526         KASSERT(swc->swc_nused <= swc->swc_nallocated);
  527 
  528         slot = swc->swc_slot;
  529         nused = swc->swc_nused;
  530         nallocated = swc->swc_nallocated;
  531 
  532         /*
  533          * if this is the final pageout we could have a few
  534          * unused swap blocks.  if so, free them now.
  535          */
  536 
  537         if (nused < nallocated) {
  538                 if (!now) {
  539                         return;
  540                 }
  541                 uvm_swap_free(slot + nused, nallocated - nused);
  542         }
  543 
  544         /*
  545          * now start the pageout.
  546          */
  547 
  548         if (nused > 0) {
  549                 uvmexp.pdpageouts++;
  550                 uvm_pageout_start(nused);
  551                 error = uvm_swap_put(slot, swc->swc_pages, nused, 0);
  552                 KASSERT(error == 0 || error == ENOMEM);
  553         }
  554 
  555         /*
  556          * zero swslot to indicate that we are
  557          * no longer building a swap-backed cluster.
  558          */
  559 
  560         swc->swc_slot = 0;
  561         swc->swc_nused = 0;
  562 }
  563 
  564 static int
  565 swapcluster_nused(struct swapcluster *swc)
  566 {
  567 
  568         return swc->swc_nused;
  569 }
  570 
  571 /*
  572  * uvmpd_dropswap: free any swap allocated to this page.
  573  *
  574  * => called with owner locked.
  575  * => return true if a page had an associated slot.
  576  */
  577 
  578 static bool
  579 uvmpd_dropswap(struct vm_page *pg)
  580 {
  581         bool result = false;
  582         struct vm_anon *anon = pg->uanon;
  583 
  584         if ((pg->pqflags & PQ_ANON) && anon->an_swslot) {
  585                 uvm_swap_free(anon->an_swslot, 1);
  586                 anon->an_swslot = 0;
  587                 pg->flags &= ~PG_CLEAN;
  588                 result = true;
  589         } else if (pg->pqflags & PQ_AOBJ) {
  590                 int slot = uao_set_swslot(pg->uobject,
  591                     pg->offset >> PAGE_SHIFT, 0);
  592                 if (slot) {
  593                         uvm_swap_free(slot, 1);
  594                         pg->flags &= ~PG_CLEAN;
  595                         result = true;
  596                 }
  597         }
  598 
  599         return result;
  600 }
  601 
  602 /*
  603  * uvmpd_trydropswap: try to free any swap allocated to this page.
  604  *
  605  * => return true if a slot is successfully freed.
  606  */
  607 
  608 bool
  609 uvmpd_trydropswap(struct vm_page *pg)
  610 {
  611         kmutex_t *slock;
  612         bool result;
  613 
  614         if ((pg->flags & PG_BUSY) != 0) {
  615                 return false;
  616         }
  617 
  618         /*
  619          * lock the page's owner.
  620          */
  621 
  622         slock = uvmpd_trylockowner(pg);
  623         if (slock == NULL) {
  624                 return false;
  625         }
  626 
  627         /*
  628          * skip this page if it's busy.
  629          */
  630 
  631         if ((pg->flags & PG_BUSY) != 0) {
  632                 mutex_exit(slock);
  633                 return false;
  634         }
  635 
  636         result = uvmpd_dropswap(pg);
  637 
  638         mutex_exit(slock);
  639 
  640         return result;
  641 }
  642 
  643 #endif /* defined(VMSWAP) */
  644 
  645 /*
  646  * uvmpd_scan_queue: scan an replace candidate list for pages
  647  * to clean or free.
  648  *
  649  * => called with page queues locked
  650  * => we work on meeting our free target by converting inactive pages
  651  *    into free pages.
  652  * => we handle the building of swap-backed clusters
  653  */
  654 
  655 static void
  656 uvmpd_scan_queue(void)
  657 {
  658         struct vm_page *p;
  659         struct uvm_object *uobj;
  660         struct vm_anon *anon;
  661 #if defined(VMSWAP)
  662         struct swapcluster swc;
  663 #endif /* defined(VMSWAP) */
  664         int dirtyreacts;
  665         int lockownerfail;
  666         kmutex_t *slock;
  667         UVMHIST_FUNC("uvmpd_scan_queue"); UVMHIST_CALLED(pdhist);
  668 
  669         /*
  670          * swslot is non-zero if we are building a swap cluster.  we want
  671          * to stay in the loop while we have a page to scan or we have
  672          * a swap-cluster to build.
  673          */
  674 
  675 #if defined(VMSWAP)
  676         swapcluster_init(&swc);
  677 #endif /* defined(VMSWAP) */
  678 
  679         dirtyreacts = 0;
  680         lockownerfail = 0;
  681         uvmpdpol_scaninit();
  682 
  683         while (/* CONSTCOND */ 1) {
  684 
  685                 /*
  686                  * see if we've met the free target.
  687                  */
  688 
  689                 if (uvmexp.free + uvmexp.paging
  690 #if defined(VMSWAP)
  691                     + swapcluster_nused(&swc)
  692 #endif /* defined(VMSWAP) */
  693                     >= uvmexp.freetarg << 2 ||
  694                     dirtyreacts == UVMPD_NUMDIRTYREACTS) {
  695                         UVMHIST_LOG(pdhist,"  met free target: "
  696                                     "exit loop", 0, 0, 0, 0);
  697                         break;
  698                 }
  699 
  700                 p = uvmpdpol_selectvictim();
  701                 if (p == NULL) {
  702                         break;
  703                 }
  704                 KASSERT(uvmpdpol_pageisqueued_p(p));
  705                 KASSERT(p->wire_count == 0);
  706 
  707                 /*
  708                  * we are below target and have a new page to consider.
  709                  */
  710 
  711                 anon = p->uanon;
  712                 uobj = p->uobject;
  713 
  714                 /*
  715                  * first we attempt to lock the object that this page
  716                  * belongs to.  if our attempt fails we skip on to
  717                  * the next page (no harm done).  it is important to
  718                  * "try" locking the object as we are locking in the
  719                  * wrong order (pageq -> object) and we don't want to
  720                  * deadlock.
  721                  *
  722                  * the only time we expect to see an ownerless page
  723                  * (i.e. a page with no uobject and !PQ_ANON) is if an
  724                  * anon has loaned a page from a uvm_object and the
  725                  * uvm_object has dropped the ownership.  in that
  726                  * case, the anon can "take over" the loaned page
  727                  * and make it its own.
  728                  */
  729 
  730                 slock = uvmpd_trylockowner(p);
  731                 if (slock == NULL) {
  732                         /*
  733                          * yield cpu to make a chance for an LWP holding
  734                          * the lock run.  otherwise we can busy-loop too long
  735                          * if the page queue is filled with a lot of pages
  736                          * from few objects.
  737                          */
  738                         lockownerfail++;
  739                         if (lockownerfail > UVMPD_NUMTRYLOCKOWNER) {
  740                                 mutex_exit(&uvm_pageqlock);
  741                                 /* XXX Better than yielding but inadequate. */
  742                                 kpause("livelock", false, 1, NULL);
  743                                 mutex_enter(&uvm_pageqlock);
  744                                 lockownerfail = 0;
  745                         }
  746                         continue;
  747                 }
  748                 if (p->flags & PG_BUSY) {
  749                         mutex_exit(slock);
  750                         uvmexp.pdbusy++;
  751                         continue;
  752                 }
  753 
  754                 /* does the page belong to an object? */
  755                 if (uobj != NULL) {
  756                         uvmexp.pdobscan++;
  757                 } else {
  758 #if defined(VMSWAP)
  759                         KASSERT(anon != NULL);
  760                         uvmexp.pdanscan++;
  761 #else /* defined(VMSWAP) */
  762                         panic("%s: anon", __func__);
  763 #endif /* defined(VMSWAP) */
  764                 }
  765 
  766 
  767                 /*
  768                  * we now have the object and the page queues locked.
  769                  * if the page is not swap-backed, call the object's
  770                  * pager to flush and free the page.
  771                  */
  772 
  773 #if defined(READAHEAD_STATS)
  774                 if ((p->pqflags & PQ_READAHEAD) != 0) {
  775                         p->pqflags &= ~PQ_READAHEAD;
  776                         uvm_ra_miss.ev_count++;
  777                 }
  778 #endif /* defined(READAHEAD_STATS) */
  779 
  780                 if ((p->pqflags & PQ_SWAPBACKED) == 0) {
  781                         KASSERT(uobj != NULL);
  782                         mutex_exit(&uvm_pageqlock);
  783                         (void) (uobj->pgops->pgo_put)(uobj, p->offset,
  784                             p->offset + PAGE_SIZE, PGO_CLEANIT|PGO_FREE);
  785                         mutex_enter(&uvm_pageqlock);
  786                         continue;
  787                 }
  788 
  789                 /*
  790                  * the page is swap-backed.  remove all the permissions
  791                  * from the page so we can sync the modified info
  792                  * without any race conditions.  if the page is clean
  793                  * we can free it now and continue.
  794                  */
  795 
  796                 pmap_page_protect(p, VM_PROT_NONE);
  797                 if ((p->flags & PG_CLEAN) && pmap_clear_modify(p)) {
  798                         p->flags &= ~(PG_CLEAN);
  799                 }
  800                 if (p->flags & PG_CLEAN) {
  801                         int slot;
  802                         int pageidx;
  803 
  804                         pageidx = p->offset >> PAGE_SHIFT;
  805                         uvm_pagefree(p);
  806                         uvmexp.pdfreed++;
  807 
  808                         /*
  809                          * for anons, we need to remove the page
  810                          * from the anon ourselves.  for aobjs,
  811                          * pagefree did that for us.
  812                          */
  813 
  814                         if (anon) {
  815                                 KASSERT(anon->an_swslot != 0);
  816                                 anon->an_page = NULL;
  817                                 slot = anon->an_swslot;
  818                         } else {
  819                                 slot = uao_find_swslot(uobj, pageidx);
  820                         }
  821                         mutex_exit(slock);
  822 
  823                         if (slot > 0) {
  824                                 /* this page is now only in swap. */
  825                                 mutex_enter(&uvm_swap_data_lock);
  826                                 KASSERT(uvmexp.swpgonly < uvmexp.swpginuse);
  827                                 uvmexp.swpgonly++;
  828                                 mutex_exit(&uvm_swap_data_lock);
  829                         }
  830                         continue;
  831                 }
  832 
  833 #if defined(VMSWAP)
  834                 /*
  835                  * this page is dirty, skip it if we'll have met our
  836                  * free target when all the current pageouts complete.
  837                  */
  838 
  839                 if (uvmexp.free + uvmexp.paging > uvmexp.freetarg << 2) {
  840                         mutex_exit(slock);
  841                         continue;
  842                 }
  843 
  844                 /*
  845                  * free any swap space allocated to the page since
  846                  * we'll have to write it again with its new data.
  847                  */
  848 
  849                 uvmpd_dropswap(p);
  850 
  851                 /*
  852                  * start new swap pageout cluster (if necessary).
  853                  *
  854                  * if swap is full reactivate this page so that
  855                  * we eventually cycle all pages through the
  856                  * inactive queue.
  857                  */
  858 
  859                 if (swapcluster_allocslots(&swc)) {
  860                         dirtyreacts++;
  861                         uvm_pageactivate(p);
  862                         mutex_exit(slock);
  863                         continue;
  864                 }
  865 
  866                 /*
  867                  * at this point, we're definitely going reuse this
  868                  * page.  mark the page busy and delayed-free.
  869                  * we should remove the page from the page queues
  870                  * so we don't ever look at it again.
  871                  * adjust counters and such.
  872                  */
  873 
  874                 p->flags |= PG_BUSY;
  875                 UVM_PAGE_OWN(p, "scan_queue");
  876 
  877                 p->flags |= PG_PAGEOUT;
  878                 uvm_pagedequeue(p);
  879 
  880                 uvmexp.pgswapout++;
  881                 mutex_exit(&uvm_pageqlock);
  882 
  883                 /*
  884                  * add the new page to the cluster.
  885                  */
  886 
  887                 if (swapcluster_add(&swc, p)) {
  888                         p->flags &= ~(PG_BUSY|PG_PAGEOUT);
  889                         UVM_PAGE_OWN(p, NULL);
  890                         mutex_enter(&uvm_pageqlock);
  891                         dirtyreacts++;
  892                         uvm_pageactivate(p);
  893                         mutex_exit(slock);
  894                         continue;
  895                 }
  896                 mutex_exit(slock);
  897 
  898                 swapcluster_flush(&swc, false);
  899                 mutex_enter(&uvm_pageqlock);
  900 
  901                 /*
  902                  * the pageout is in progress.  bump counters and set up
  903                  * for the next loop.
  904                  */
  905 
  906                 uvmexp.pdpending++;
  907 
  908 #else /* defined(VMSWAP) */
  909                 uvm_pageactivate(p);
  910                 mutex_exit(slock);
  911 #endif /* defined(VMSWAP) */
  912         }
  913 
  914 #if defined(VMSWAP)
  915         mutex_exit(&uvm_pageqlock);
  916         swapcluster_flush(&swc, true);
  917         mutex_enter(&uvm_pageqlock);
  918 #endif /* defined(VMSWAP) */
  919 }
  920 
  921 /*
  922  * uvmpd_scan: scan the page queues and attempt to meet our targets.
  923  *
  924  * => called with pageq's locked
  925  */
  926 
  927 static void
  928 uvmpd_scan(void)
  929 {
  930         int swap_shortage, pages_freed;
  931         UVMHIST_FUNC("uvmpd_scan"); UVMHIST_CALLED(pdhist);
  932 
  933         uvmexp.pdrevs++;
  934 
  935         /*
  936          * work on meeting our targets.   first we work on our free target
  937          * by converting inactive pages into free pages.  then we work on
  938          * meeting our inactive target by converting active pages to
  939          * inactive ones.
  940          */
  941 
  942         UVMHIST_LOG(pdhist, "  starting 'free' loop",0,0,0,0);
  943 
  944         pages_freed = uvmexp.pdfreed;
  945         uvmpd_scan_queue();
  946         pages_freed = uvmexp.pdfreed - pages_freed;
  947 
  948         /*
  949          * detect if we're not going to be able to page anything out
  950          * until we free some swap resources from active pages.
  951          */
  952 
  953         swap_shortage = 0;
  954         if (uvmexp.free < uvmexp.freetarg &&
  955             uvmexp.swpginuse >= uvmexp.swpgavail &&
  956             !uvm_swapisfull() &&
  957             pages_freed == 0) {
  958                 swap_shortage = uvmexp.freetarg - uvmexp.free;
  959         }
  960 
  961         uvmpdpol_balancequeue(swap_shortage);
  962 
  963         /*
  964          * swap out some processes if we are still below the minimum
  965          * free target.  we need to unlock the page queues for this.
  966          */
  967 
  968         if (uvmexp.free < uvmexp.freemin && uvmexp.nswapdev != 0 &&
  969             uvm.swapout_enabled) {
  970                 uvmexp.pdswout++;
  971                 UVMHIST_LOG(pdhist,"  free %d < min %d: swapout",
  972                     uvmexp.free, uvmexp.freemin, 0, 0);
  973                 mutex_exit(&uvm_pageqlock);
  974                 uvm_swapout_threads();
  975                 mutex_enter(&uvm_pageqlock);
  976 
  977         }
  978 }
  979 
  980 /*
  981  * uvm_reclaimable: decide whether to wait for pagedaemon.
  982  *
  983  * => return true if it seems to be worth to do uvm_wait.
  984  *
  985  * XXX should be tunable.
  986  * XXX should consider pools, etc?
  987  */
  988 
  989 bool
  990 uvm_reclaimable(void)
  991 {
  992         int filepages;
  993         int active, inactive;
  994 
  995         /*
  996          * if swap is not full, no problem.
  997          */
  998 
  999         if (!uvm_swapisfull()) {
 1000                 return true;
 1001         }
 1002 
 1003         /*
 1004          * file-backed pages can be reclaimed even when swap is full.
 1005          * if we have more than 1/16 of pageable memory or 5MB, try to reclaim.
 1006          *
 1007          * XXX assume the worst case, ie. all wired pages are file-backed.
 1008          *
 1009          * XXX should consider about other reclaimable memory.
 1010          * XXX ie. pools, traditional buffer cache.
 1011          */
 1012 
 1013         filepages = uvmexp.filepages + uvmexp.execpages - uvmexp.wired;
 1014         uvm_estimatepageable(&active, &inactive);
 1015         if (filepages >= MIN((active + inactive) >> 4,
 1016             5 * 1024 * 1024 >> PAGE_SHIFT)) {
 1017                 return true;
 1018         }
 1019 
 1020         /*
 1021          * kill the process, fail allocation, etc..
 1022          */
 1023 
 1024         return false;
 1025 }
 1026 
 1027 void
 1028 uvm_estimatepageable(int *active, int *inactive)
 1029 {
 1030 
 1031         uvmpdpol_estimatepageable(active, inactive);
 1032 }
Cache object: cf88822de864450905bb5db2db68aabe
[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]
This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.
FreeBSD/Linux Kernel Cross Reference sys/uvm/uvm_pdaemon.c

FreeBSD/Linux Kernel Cross Reference
sys/uvm/uvm_pdaemon.c