uvm_pdaemon.c

Version: - FREEBSD - FREEBSD-13-STABLE - FREEBSD-13-0 - FREEBSD-12-STABLE - FREEBSD-12-0 - FREEBSD-11-STABLE - FREEBSD-11-0 - FREEBSD-10-STABLE - FREEBSD-10-0 - FREEBSD-9-STABLE - FREEBSD-9-0 - FREEBSD-8-STABLE - FREEBSD-8-0 - FREEBSD-7-STABLE - FREEBSD-7-0 - FREEBSD-6-STABLE - FREEBSD-6-0 - FREEBSD-5-STABLE - FREEBSD-5-0 - FREEBSD-4-STABLE - FREEBSD-3-STABLE - FREEBSD22 - l41 - OPENBSD - linux-2.6 - MK84 - PLAN9 - xnu-8792
SearchContext: - none - 3 - 10
    1 /*      $OpenBSD: uvm_pdaemon.c,v 1.105 2022/09/10 20:35:29 miod Exp $  */
    2 /*      $NetBSD: uvm_pdaemon.c,v 1.23 2000/08/20 10:24:14 bjh21 Exp $   */
    3 
    4 /*
    5  * Copyright (c) 1997 Charles D. Cranor and Washington University.
    6  * Copyright (c) 1991, 1993, The Regents of the University of California.
    7  *
    8  * All rights reserved.
    9  *
   10  * This code is derived from software contributed to Berkeley by
   11  * The Mach Operating System project at Carnegie-Mellon University.
   12  *
   13  * Redistribution and use in source and binary forms, with or without
   14  * modification, are permitted provided that the following conditions
   15  * are met:
   16  * 1. Redistributions of source code must retain the above copyright
   17  *    notice, this list of conditions and the following disclaimer.
   18  * 2. Redistributions in binary form must reproduce the above copyright
   19  *    notice, this list of conditions and the following disclaimer in the
   20  *    documentation and/or other materials provided with the distribution.
   21  * 3. Neither the name of the University nor the names of its contributors
   22  *    may be used to endorse or promote products derived from this software
   23  *    without specific prior written permission.
   24  *
   25  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   27  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   28  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   29  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   30  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   31  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   32  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   33  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   34  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   35  * SUCH DAMAGE.
   36  *
   37  *      @(#)vm_pageout.c        8.5 (Berkeley) 2/14/94
   38  * from: Id: uvm_pdaemon.c,v 1.1.2.32 1998/02/06 05:26:30 chs Exp
   39  *
   40  *
   41  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
   42  * All rights reserved.
   43  *
   44  * Permission to use, copy, modify and distribute this software and
   45  * its documentation is hereby granted, provided that both the copyright
   46  * notice and this permission notice appear in all copies of the
   47  * software, derivative works or modified versions, and any portions
   48  * thereof, and that both notices appear in supporting documentation.
   49  *
   50  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
   51  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
   52  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
   53  *
   54  * Carnegie Mellon requests users of this software to return to
   55  *
   56  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
   57  *  School of Computer Science
   58  *  Carnegie Mellon University
   59  *  Pittsburgh PA 15213-3890
   60  *
   61  * any improvements or extensions that they make and grant Carnegie the
   62  * rights to redistribute these changes.
   63  */
   64 
   65 /*
   66  * uvm_pdaemon.c: the page daemon
   67  */
   68 
   69 #include <sys/param.h>
   70 #include <sys/systm.h>
   71 #include <sys/kernel.h>
   72 #include <sys/pool.h>
   73 #include <sys/proc.h>
   74 #include <sys/buf.h>
   75 #include <sys/mount.h>
   76 #include <sys/atomic.h>
   77 
   78 #ifdef HIBERNATE
   79 #include <sys/hibernate.h>
   80 #endif
   81 
   82 #include <uvm/uvm.h>
   83 
   84 #include "drm.h"
   85 
   86 #if NDRM > 0
   87 extern void drmbackoff(long);
   88 #endif
   89 
   90 /*
   91  * UVMPD_NUMDIRTYREACTS is how many dirty pages the pagedaemon will reactivate
   92  * in a pass thru the inactive list when swap is full.  the value should be
   93  * "small"... if it's too large we'll cycle the active pages thru the inactive
   94  * queue too quickly to for them to be referenced and avoid being freed.
   95  */
   96 
   97 #define UVMPD_NUMDIRTYREACTS 16
   98 
   99 
  100 /*
  101  * local prototypes
  102  */
  103 
  104 struct rwlock   *uvmpd_trylockowner(struct vm_page *);
  105 void            uvmpd_scan(struct uvm_pmalloc *);
  106 void            uvmpd_scan_inactive(struct uvm_pmalloc *, struct pglist *);
  107 void            uvmpd_tune(void);
  108 void            uvmpd_drop(struct pglist *);
  109 void            uvmpd_dropswap(struct vm_page *);
  110 
  111 /*
  112  * uvm_wait: wait (sleep) for the page daemon to free some pages
  113  *
  114  * => should be called with all locks released
  115  * => should _not_ be called by the page daemon (to avoid deadlock)
  116  */
  117 
  118 void
  119 uvm_wait(const char *wmsg)
  120 {
  121         uint64_t timo = INFSLP;
  122 
  123 #ifdef DIAGNOSTIC
  124         if (curproc == &proc0)
  125                 panic("%s: cannot sleep for memory during boot", __func__);
  126 #endif
  127 
  128         /*
  129          * check for page daemon going to sleep (waiting for itself)
  130          */
  131         if (curproc == uvm.pagedaemon_proc) {
  132                 printf("uvm_wait emergency bufbackoff\n");
  133                 if (bufbackoff(NULL, 4) == 0)
  134                         return;
  135                 /*
  136                  * now we have a problem: the pagedaemon wants to go to
  137                  * sleep until it frees more memory.   but how can it
  138                  * free more memory if it is asleep?  that is a deadlock.
  139                  * we have two options:
  140                  *  [1] panic now
  141                  *  [2] put a timeout on the sleep, thus causing the
  142                  *      pagedaemon to only pause (rather than sleep forever)
  143                  *
  144                  * note that option [2] will only help us if we get lucky
  145                  * and some other process on the system breaks the deadlock
  146                  * by exiting or freeing memory (thus allowing the pagedaemon
  147                  * to continue).  for now we panic if DEBUG is defined,
  148                  * otherwise we hope for the best with option [2] (better
  149                  * yet, this should never happen in the first place!).
  150                  */
  151 
  152                 printf("pagedaemon: deadlock detected!\n");
  153                 timo = MSEC_TO_NSEC(125);       /* set timeout */
  154 #if defined(DEBUG)
  155                 /* DEBUG: panic so we can debug it */
  156                 panic("pagedaemon deadlock");
  157 #endif
  158         }
  159 
  160         uvm_lock_fpageq();
  161         wakeup(&uvm.pagedaemon);                /* wake the daemon! */
  162         msleep_nsec(&uvmexp.free, &uvm.fpageqlock, PVM | PNORELOCK, wmsg, timo);
  163 }
  164 
  165 /*
  166  * uvmpd_tune: tune paging parameters
  167  *
  168  * => called whenever memory is added to (or removed from?) the system
  169  * => caller must call with page queues locked
  170  */
  171 
  172 void
  173 uvmpd_tune(void)
  174 {
  175 
  176         uvmexp.freemin = uvmexp.npages / 30;
  177 
  178         /* between 16k and 512k */
  179         /* XXX:  what are these values good for? */
  180         uvmexp.freemin = max(uvmexp.freemin, (16*1024) >> PAGE_SHIFT);
  181 #if 0
  182         uvmexp.freemin = min(uvmexp.freemin, (512*1024) >> PAGE_SHIFT);
  183 #endif
  184 
  185         /* Make sure there's always a user page free. */
  186         if (uvmexp.freemin < uvmexp.reserve_kernel + 1)
  187                 uvmexp.freemin = uvmexp.reserve_kernel + 1;
  188 
  189         uvmexp.freetarg = (uvmexp.freemin * 4) / 3;
  190         if (uvmexp.freetarg <= uvmexp.freemin)
  191                 uvmexp.freetarg = uvmexp.freemin + 1;
  192 
  193         /* uvmexp.inactarg: computed in main daemon loop */
  194 
  195         uvmexp.wiredmax = uvmexp.npages / 3;
  196 }
  197 
  198 /*
  199  * Indicate to the page daemon that a nowait call failed and it should
  200  * recover at least some memory in the most restricted region (assumed
  201  * to be dma_constraint).
  202  */
  203 volatile int uvm_nowait_failed;
  204 
  205 /*
  206  * uvm_pageout: the main loop for the pagedaemon
  207  */
  208 void
  209 uvm_pageout(void *arg)
  210 {
  211         struct uvm_constraint_range constraint;
  212         struct uvm_pmalloc *pma;
  213         int npages = 0;
  214 
  215         /* ensure correct priority and set paging parameters... */
  216         uvm.pagedaemon_proc = curproc;
  217         (void) spl0();
  218         uvm_lock_pageq();
  219         npages = uvmexp.npages;
  220         uvmpd_tune();
  221         uvm_unlock_pageq();
  222 
  223         for (;;) {
  224                 long size;
  225 
  226                 uvm_lock_fpageq();
  227                 if (!uvm_nowait_failed && TAILQ_EMPTY(&uvm.pmr_control.allocs)) {
  228                         msleep_nsec(&uvm.pagedaemon, &uvm.fpageqlock, PVM,
  229                             "pgdaemon", INFSLP);
  230                         uvmexp.pdwoke++;
  231                 }
  232 
  233                 if ((pma = TAILQ_FIRST(&uvm.pmr_control.allocs)) != NULL) {
  234                         pma->pm_flags |= UVM_PMA_BUSY;
  235                         constraint = pma->pm_constraint;
  236                 } else {
  237                         if (uvm_nowait_failed) {
  238                                 /*
  239                                  * XXX realisticly, this is what our
  240                                  * nowait callers probably care about
  241                                  */
  242                                 constraint = dma_constraint;
  243                                 uvm_nowait_failed = 0;
  244                         } else
  245                                 constraint = no_constraint;
  246                 }
  247 
  248                 uvm_unlock_fpageq();
  249 
  250                 /*
  251                  * now lock page queues and recompute inactive count
  252                  */
  253                 uvm_lock_pageq();
  254                 if (npages != uvmexp.npages) {  /* check for new pages? */
  255                         npages = uvmexp.npages;
  256                         uvmpd_tune();
  257                 }
  258 
  259                 uvmexp.inactarg = (uvmexp.active + uvmexp.inactive) / 3;
  260                 if (uvmexp.inactarg <= uvmexp.freetarg) {
  261                         uvmexp.inactarg = uvmexp.freetarg + 1;
  262                 }
  263 
  264                 /* Reclaim pages from the buffer cache if possible. */
  265                 size = 0;
  266                 if (pma != NULL)
  267                         size += pma->pm_size >> PAGE_SHIFT;
  268                 if (uvmexp.free - BUFPAGES_DEFICIT < uvmexp.freetarg)
  269                         size += uvmexp.freetarg - (uvmexp.free -
  270                             BUFPAGES_DEFICIT);
  271                 if (size == 0)
  272                         size = 16; /* XXX */
  273                 uvm_unlock_pageq();
  274                 (void) bufbackoff(&constraint, size * 2);
  275 #if NDRM > 0
  276                 drmbackoff(size * 2);
  277 #endif
  278                 uvm_lock_pageq();
  279 
  280                 /*
  281                  * scan if needed
  282                  */
  283                 if (pma != NULL ||
  284                     ((uvmexp.free - BUFPAGES_DEFICIT) < uvmexp.freetarg) ||
  285                     ((uvmexp.inactive + BUFPAGES_INACT) < uvmexp.inactarg)) {
  286                         uvmpd_scan(pma);
  287                 }
  288 
  289                 /*
  290                  * if there's any free memory to be had,
  291                  * wake up any waiters.
  292                  */
  293                 uvm_lock_fpageq();
  294                 if (uvmexp.free > uvmexp.reserve_kernel ||
  295                     uvmexp.paging == 0) {
  296                         wakeup(&uvmexp.free);
  297                 }
  298 
  299                 if (pma != NULL) {
  300                         /* 
  301                          * XXX If UVM_PMA_FREED isn't set, no pages
  302                          * were freed.  Should we set UVM_PMA_FAIL in
  303                          * that case?
  304                          */
  305                         pma->pm_flags &= ~UVM_PMA_BUSY;
  306                         if (pma->pm_flags & UVM_PMA_FREED) {
  307                                 pma->pm_flags &= ~UVM_PMA_LINKED;
  308                                 TAILQ_REMOVE(&uvm.pmr_control.allocs, pma,
  309                                     pmq);
  310                                 wakeup(pma);
  311                         }
  312                 }
  313                 uvm_unlock_fpageq();
  314 
  315                 /*
  316                  * scan done.  unlock page queues (the only lock we are holding)
  317                  */
  318                 uvm_unlock_pageq();
  319 
  320                 sched_pause(yield);
  321         }
  322         /*NOTREACHED*/
  323 }
  324 
  325 
  326 /*
  327  * uvm_aiodone_daemon:  main loop for the aiodone daemon.
  328  */
  329 void
  330 uvm_aiodone_daemon(void *arg)
  331 {
  332         int s, free;
  333         struct buf *bp, *nbp;
  334 
  335         uvm.aiodoned_proc = curproc;
  336 
  337         for (;;) {
  338                 /*
  339                  * Check for done aio structures. If we've got structures to
  340                  * process, do so. Otherwise sleep while avoiding races.
  341                  */
  342                 mtx_enter(&uvm.aiodoned_lock);
  343                 while ((bp = TAILQ_FIRST(&uvm.aio_done)) == NULL)
  344                         msleep_nsec(&uvm.aiodoned, &uvm.aiodoned_lock,
  345                             PVM, "aiodoned", INFSLP);
  346                 /* Take the list for ourselves. */
  347                 TAILQ_INIT(&uvm.aio_done);
  348                 mtx_leave(&uvm.aiodoned_lock);
  349 
  350                 /* process each i/o that's done. */
  351                 free = uvmexp.free;
  352                 while (bp != NULL) {
  353                         if (bp->b_flags & B_PDAEMON) {
  354                                 uvmexp.paging -= bp->b_bufsize >> PAGE_SHIFT;
  355                         }
  356                         nbp = TAILQ_NEXT(bp, b_freelist);
  357                         s = splbio();   /* b_iodone must by called at splbio */
  358                         (*bp->b_iodone)(bp);
  359                         splx(s);
  360                         bp = nbp;
  361 
  362                         sched_pause(yield);
  363                 }
  364                 uvm_lock_fpageq();
  365                 wakeup(free <= uvmexp.reserve_kernel ? &uvm.pagedaemon :
  366                     &uvmexp.free);
  367                 uvm_unlock_fpageq();
  368         }
  369 }
  370 
  371 /*
  372  * uvmpd_trylockowner: trylock the page's owner.
  373  *
  374  * => return the locked rwlock on success.  otherwise, return NULL.
  375  */
  376 struct rwlock *
  377 uvmpd_trylockowner(struct vm_page *pg)
  378 {
  379 
  380         struct uvm_object *uobj = pg->uobject;
  381         struct rwlock *slock;
  382 
  383         if (uobj != NULL) {
  384                 slock = uobj->vmobjlock;
  385         } else {
  386                 struct vm_anon *anon = pg->uanon;
  387 
  388                 KASSERT(anon != NULL);
  389                 slock = anon->an_lock;
  390         }
  391 
  392         if (rw_enter(slock, RW_WRITE|RW_NOSLEEP)) {
  393                 return NULL;
  394         }
  395 
  396         return slock;
  397 }
  398 
  399 
  400 /*
  401  * uvmpd_dropswap: free any swap allocated to this page.
  402  *
  403  * => called with owner locked.
  404  */
  405 void
  406 uvmpd_dropswap(struct vm_page *pg)
  407 {
  408         struct vm_anon *anon = pg->uanon;
  409 
  410         if ((pg->pg_flags & PQ_ANON) && anon->an_swslot) {
  411                 uvm_swap_free(anon->an_swslot, 1);
  412                 anon->an_swslot = 0;
  413         } else if (pg->pg_flags & PQ_AOBJ) {
  414                 uao_dropswap(pg->uobject, pg->offset >> PAGE_SHIFT);
  415         }
  416 }
  417 
  418 /*
  419  * uvmpd_scan_inactive: scan an inactive list for pages to clean or free.
  420  *
  421  * => called with page queues locked
  422  * => we work on meeting our free target by converting inactive pages
  423  *    into free pages.
  424  * => we handle the building of swap-backed clusters
  425  * => we return TRUE if we are exiting because we met our target
  426  */
  427 void
  428 uvmpd_scan_inactive(struct uvm_pmalloc *pma, struct pglist *pglst)
  429 {
  430         int free, result;
  431         struct vm_page *p, *nextpg;
  432         struct uvm_object *uobj;
  433         struct vm_page *pps[SWCLUSTPAGES], **ppsp;
  434         int npages;
  435         struct vm_page *swpps[SWCLUSTPAGES];    /* XXX: see below */
  436         struct rwlock *slock;
  437         int swnpages, swcpages;                         /* XXX: see below */
  438         int swslot;
  439         struct vm_anon *anon;
  440         boolean_t swap_backed;
  441         vaddr_t start;
  442         int dirtyreacts;
  443 
  444         /*
  445          * swslot is non-zero if we are building a swap cluster.  we want
  446          * to stay in the loop while we have a page to scan or we have
  447          * a swap-cluster to build.
  448          */
  449         swslot = 0;
  450         swnpages = swcpages = 0;
  451         dirtyreacts = 0;
  452         p = NULL;
  453 
  454         /* Start with the first page on the list that fit in pma's ranges */
  455         if (pma != NULL) {
  456                 paddr_t paddr;
  457 
  458                 TAILQ_FOREACH(p, pglst, pageq) {
  459                         paddr = atop(VM_PAGE_TO_PHYS(p));
  460                         if (paddr >= pma->pm_constraint.ucr_low &&
  461                             paddr < pma->pm_constraint.ucr_high)
  462                                 break;
  463                 }
  464 
  465         }
  466 
  467         if (p == NULL) {
  468                 p = TAILQ_FIRST(pglst);
  469                 pma = NULL;
  470         }
  471 
  472         for (; p != NULL || swslot != 0; p = nextpg) {
  473                 /*
  474                  * note that p can be NULL iff we have traversed the whole
  475                  * list and need to do one final swap-backed clustered pageout.
  476                  */
  477                 uobj = NULL;
  478                 anon = NULL;
  479                 if (p) {
  480                         /*
  481                          * see if we've met our target
  482                          */
  483                         free = uvmexp.free - BUFPAGES_DEFICIT;
  484                         if (((pma == NULL || (pma->pm_flags & UVM_PMA_FREED)) &&
  485                             (free + uvmexp.paging >= uvmexp.freetarg << 2)) ||
  486                             dirtyreacts == UVMPD_NUMDIRTYREACTS) {
  487                                 if (swslot == 0) {
  488                                         /* exit now if no swap-i/o pending */
  489                                         break;
  490                                 }
  491 
  492                                 /* set p to null to signal final swap i/o */
  493                                 p = NULL;
  494                                 nextpg = NULL;
  495                         }
  496                 }
  497                 if (p) {        /* if (we have a new page to consider) */
  498                         /*
  499                          * we are below target and have a new page to consider.
  500                          */
  501                         uvmexp.pdscans++;
  502                         nextpg = TAILQ_NEXT(p, pageq);
  503 
  504                         anon = p->uanon;
  505                         uobj = p->uobject;
  506 
  507                         /*
  508                          * first we attempt to lock the object that this page
  509                          * belongs to.  if our attempt fails we skip on to
  510                          * the next page (no harm done).  it is important to
  511                          * "try" locking the object as we are locking in the
  512                          * wrong order (pageq -> object) and we don't want to
  513                          * deadlock.
  514                          */
  515                         slock = uvmpd_trylockowner(p);
  516                         if (slock == NULL) {
  517                                 continue;
  518                         }
  519 
  520                         /*
  521                          * move referenced pages back to active queue
  522                          * and skip to next page.
  523                          */
  524                         if (pmap_is_referenced(p)) {
  525                                 uvm_pageactivate(p);
  526                                 rw_exit(slock);
  527                                 uvmexp.pdreact++;
  528                                 continue;
  529                         }
  530 
  531                         if (p->pg_flags & PG_BUSY) {
  532                                 rw_exit(slock);
  533                                 uvmexp.pdbusy++;
  534                                 continue;
  535                         }
  536 
  537                         /* does the page belong to an object? */
  538                         if (uobj != NULL) {
  539                                 uvmexp.pdobscan++;
  540                         } else {
  541                                 KASSERT(anon != NULL);
  542                                 uvmexp.pdanscan++;
  543                         }
  544 
  545                         /*
  546                          * we now have the page queues locked.
  547                          * the page is not busy.   if the page is clean we
  548                          * can free it now and continue.
  549                          */
  550                         if (p->pg_flags & PG_CLEAN) {
  551                                 if (p->pg_flags & PQ_SWAPBACKED) {
  552                                         /* this page now lives only in swap */
  553                                         atomic_inc_int(&uvmexp.swpgonly);
  554                                 }
  555 
  556                                 /* zap all mappings with pmap_page_protect... */
  557                                 pmap_page_protect(p, PROT_NONE);
  558                                 uvm_pagefree(p);
  559                                 uvmexp.pdfreed++;
  560 
  561                                 if (anon) {
  562 
  563                                         /*
  564                                          * an anonymous page can only be clean
  565                                          * if it has backing store assigned.
  566                                          */
  567 
  568                                         KASSERT(anon->an_swslot != 0);
  569 
  570                                         /* remove from object */
  571                                         anon->an_page = NULL;
  572                                 }
  573                                 rw_exit(slock);
  574                                 continue;
  575                         }
  576 
  577                         /*
  578                          * this page is dirty, skip it if we'll have met our
  579                          * free target when all the current pageouts complete.
  580                          */
  581                         if ((pma == NULL || (pma->pm_flags & UVM_PMA_FREED)) &&
  582                             (free + uvmexp.paging > uvmexp.freetarg << 2)) {
  583                                 rw_exit(slock);
  584                                 continue;
  585                         }
  586 
  587                         /*
  588                          * this page is dirty, but we can't page it out
  589                          * since all pages in swap are only in swap.
  590                          * reactivate it so that we eventually cycle
  591                          * all pages thru the inactive queue.
  592                          */
  593                         if ((p->pg_flags & PQ_SWAPBACKED) && uvm_swapisfull()) {
  594                                 dirtyreacts++;
  595                                 uvm_pageactivate(p);
  596                                 rw_exit(slock);
  597                                 continue;
  598                         }
  599 
  600                         /*
  601                          * if the page is swap-backed and dirty and swap space
  602                          * is full, free any swap allocated to the page
  603                          * so that other pages can be paged out.
  604                          */
  605                         KASSERT(uvmexp.swpginuse <= uvmexp.swpages);
  606                         if ((p->pg_flags & PQ_SWAPBACKED) &&
  607                             uvmexp.swpginuse == uvmexp.swpages) {
  608                                 uvmpd_dropswap(p);
  609                         }
  610 
  611                         /*
  612                          * the page we are looking at is dirty.   we must
  613                          * clean it before it can be freed.  to do this we
  614                          * first mark the page busy so that no one else will
  615                          * touch the page.   we write protect all the mappings
  616                          * of the page so that no one touches it while it is
  617                          * in I/O.
  618                          */
  619 
  620                         swap_backed = ((p->pg_flags & PQ_SWAPBACKED) != 0);
  621                         atomic_setbits_int(&p->pg_flags, PG_BUSY);
  622                         UVM_PAGE_OWN(p, "scan_inactive");
  623                         pmap_page_protect(p, PROT_READ);
  624                         uvmexp.pgswapout++;
  625 
  626                         /*
  627                          * for swap-backed pages we need to (re)allocate
  628                          * swap space.
  629                          */
  630                         if (swap_backed) {
  631                                 /* free old swap slot (if any) */
  632                                 uvmpd_dropswap(p);
  633 
  634                                 /* start new cluster (if necessary) */
  635                                 if (swslot == 0) {
  636                                         swnpages = SWCLUSTPAGES;
  637                                         swslot = uvm_swap_alloc(&swnpages,
  638                                             TRUE);
  639                                         if (swslot == 0) {
  640                                                 /* no swap?  give up! */
  641                                                 atomic_clearbits_int(
  642                                                     &p->pg_flags,
  643                                                     PG_BUSY);
  644                                                 UVM_PAGE_OWN(p, NULL);
  645                                                 rw_exit(slock);
  646                                                 continue;
  647                                         }
  648                                         swcpages = 0;   /* cluster is empty */
  649                                 }
  650 
  651                                 /* add block to cluster */
  652                                 swpps[swcpages] = p;
  653                                 if (anon)
  654                                         anon->an_swslot = swslot + swcpages;
  655                                 else
  656                                         uao_set_swslot(uobj,
  657                                             p->offset >> PAGE_SHIFT,
  658                                             swslot + swcpages);
  659                                 swcpages++;
  660                         }
  661                 } else {
  662                         /* if p == NULL we must be doing a last swap i/o */
  663                         swap_backed = TRUE;
  664                 }
  665 
  666                 /*
  667                  * now consider doing the pageout.
  668                  *
  669                  * for swap-backed pages, we do the pageout if we have either
  670                  * filled the cluster (in which case (swnpages == swcpages) or
  671                  * run out of pages (p == NULL).
  672                  *
  673                  * for object pages, we always do the pageout.
  674                  */
  675                 if (swap_backed) {
  676                         if (p) {        /* if we just added a page to cluster */
  677                                 rw_exit(slock);
  678 
  679                                 /* cluster not full yet? */
  680                                 if (swcpages < swnpages)
  681                                         continue;
  682                         }
  683 
  684                         /* starting I/O now... set up for it */
  685                         npages = swcpages;
  686                         ppsp = swpps;
  687                         /* for swap-backed pages only */
  688                         start = (vaddr_t) swslot;
  689 
  690                         /* if this is final pageout we could have a few
  691                          * extra swap blocks */
  692                         if (swcpages < swnpages) {
  693                                 uvm_swap_free(swslot + swcpages,
  694                                     (swnpages - swcpages));
  695                         }
  696                 } else {
  697                         /* normal object pageout */
  698                         ppsp = pps;
  699                         npages = sizeof(pps) / sizeof(struct vm_page *);
  700                         /* not looked at because PGO_ALLPAGES is set */
  701                         start = 0;
  702                 }
  703 
  704                 /*
  705                  * now do the pageout.
  706                  *
  707                  * for swap_backed pages we have already built the cluster.
  708                  * for !swap_backed pages, uvm_pager_put will call the object's
  709                  * "make put cluster" function to build a cluster on our behalf.
  710                  *
  711                  * we pass the PGO_PDFREECLUST flag to uvm_pager_put to instruct
  712                  * it to free the cluster pages for us on a successful I/O (it
  713                  * always does this for un-successful I/O requests).  this
  714                  * allows us to do clustered pageout without having to deal
  715                  * with cluster pages at this level.
  716                  *
  717                  * note locking semantics of uvm_pager_put with PGO_PDFREECLUST:
  718                  *  IN: locked: page queues
  719                  * OUT: locked: 
  720                  *     !locked: pageqs
  721                  */
  722 
  723                 uvmexp.pdpageouts++;
  724                 result = uvm_pager_put(swap_backed ? NULL : uobj, p,
  725                     &ppsp, &npages, PGO_ALLPAGES|PGO_PDFREECLUST, start, 0);
  726 
  727                 /*
  728                  * if we did i/o to swap, zero swslot to indicate that we are
  729                  * no longer building a swap-backed cluster.
  730                  */
  731 
  732                 if (swap_backed)
  733                         swslot = 0;             /* done with this cluster */
  734 
  735                 /*
  736                  * first, we check for VM_PAGER_PEND which means that the
  737                  * async I/O is in progress and the async I/O done routine
  738                  * will clean up after us.   in this case we move on to the
  739                  * next page.
  740                  *
  741                  * there is a very remote chance that the pending async i/o can
  742                  * finish _before_ we get here.   if that happens, our page "p"
  743                  * may no longer be on the inactive queue.   so we verify this
  744                  * when determining the next page (starting over at the head if
  745                  * we've lost our inactive page).
  746                  */
  747 
  748                 if (result == VM_PAGER_PEND) {
  749                         uvmexp.paging += npages;
  750                         uvm_lock_pageq();
  751                         uvmexp.pdpending++;
  752                         if (p) {
  753                                 if (p->pg_flags & PQ_INACTIVE)
  754                                         nextpg = TAILQ_NEXT(p, pageq);
  755                                 else
  756                                         nextpg = TAILQ_FIRST(pglst);
  757                         } else {
  758                                 nextpg = NULL;
  759                         }
  760                         continue;
  761                 }
  762 
  763                 /* clean up "p" if we have one */
  764                 if (p) {
  765                         /*
  766                          * the I/O request to "p" is done and uvm_pager_put
  767                          * has freed any cluster pages it may have allocated
  768                          * during I/O.  all that is left for us to do is
  769                          * clean up page "p" (which is still PG_BUSY).
  770                          *
  771                          * our result could be one of the following:
  772                          *   VM_PAGER_OK: successful pageout
  773                          *
  774                          *   VM_PAGER_AGAIN: tmp resource shortage, we skip
  775                          *     to next page
  776                          *   VM_PAGER_{FAIL,ERROR,BAD}: an error.   we
  777                          *     "reactivate" page to get it out of the way (it
  778                          *     will eventually drift back into the inactive
  779                          *     queue for a retry).
  780                          *   VM_PAGER_UNLOCK: should never see this as it is
  781                          *     only valid for "get" operations
  782                          */
  783 
  784                         /* relock p's object: page queues not lock yet, so
  785                          * no need for "try" */
  786 
  787                         /* !swap_backed case: already locked... */
  788                         if (swap_backed) {
  789                                 rw_enter(slock, RW_WRITE);
  790                         }
  791 
  792 #ifdef DIAGNOSTIC
  793                         if (result == VM_PAGER_UNLOCK)
  794                                 panic("pagedaemon: pageout returned "
  795                                     "invalid 'unlock' code");
  796 #endif
  797 
  798                         /* handle PG_WANTED now */
  799                         if (p->pg_flags & PG_WANTED)
  800                                 wakeup(p);
  801 
  802                         atomic_clearbits_int(&p->pg_flags, PG_BUSY|PG_WANTED);
  803                         UVM_PAGE_OWN(p, NULL);
  804 
  805                         /* released during I/O? Can only happen for anons */
  806                         if (p->pg_flags & PG_RELEASED) {
  807                                 KASSERT(anon != NULL);
  808                                 /*
  809                                  * remove page so we can get nextpg,
  810                                  * also zero out anon so we don't use
  811                                  * it after the free.
  812                                  */
  813                                 anon->an_page = NULL;
  814                                 p->uanon = NULL;
  815 
  816                                 rw_exit(anon->an_lock);
  817                                 uvm_anfree(anon);       /* kills anon */
  818                                 pmap_page_protect(p, PROT_NONE);
  819                                 anon = NULL;
  820                                 uvm_lock_pageq();
  821                                 nextpg = TAILQ_NEXT(p, pageq);
  822                                 /* free released page */
  823                                 uvm_pagefree(p);
  824                         } else {        /* page was not released during I/O */
  825                                 uvm_lock_pageq();
  826                                 nextpg = TAILQ_NEXT(p, pageq);
  827                                 if (result != VM_PAGER_OK) {
  828                                         /* pageout was a failure... */
  829                                         if (result != VM_PAGER_AGAIN)
  830                                                 uvm_pageactivate(p);
  831                                         pmap_clear_reference(p);
  832                                         /* XXXCDC: if (swap_backed) FREE p's
  833                                          * swap block? */
  834                                 } else {
  835                                         /* pageout was a success... */
  836                                         pmap_clear_reference(p);
  837                                         pmap_clear_modify(p);
  838                                         atomic_setbits_int(&p->pg_flags,
  839                                             PG_CLEAN);
  840                                 }
  841                         }
  842 
  843                         /*
  844                          * drop object lock (if there is an object left).   do
  845                          * a safety check of nextpg to make sure it is on the
  846                          * inactive queue (it should be since PG_BUSY pages on
  847                          * the inactive queue can't be re-queued [note: not
  848                          * true for active queue]).
  849                          */
  850                         rw_exit(slock);
  851 
  852                         if (nextpg && (nextpg->pg_flags & PQ_INACTIVE) == 0) {
  853                                 nextpg = TAILQ_FIRST(pglst);    /* reload! */
  854                         }
  855                 } else {
  856                         /*
  857                          * if p is null in this loop, make sure it stays null
  858                          * in the next loop.
  859                          */
  860                         nextpg = NULL;
  861 
  862                         /*
  863                          * lock page queues here just so they're always locked
  864                          * at the end of the loop.
  865                          */
  866                         uvm_lock_pageq();
  867                 }
  868         }
  869 }
  870 
  871 /*
  872  * uvmpd_scan: scan the page queues and attempt to meet our targets.
  873  *
  874  * => called with pageq's locked
  875  */
  876 
  877 void
  878 uvmpd_scan(struct uvm_pmalloc *pma)
  879 {
  880         int free, inactive_shortage, swap_shortage, pages_freed;
  881         struct vm_page *p, *nextpg;
  882         struct rwlock *slock;
  883 
  884         MUTEX_ASSERT_LOCKED(&uvm.pageqlock);
  885 
  886         uvmexp.pdrevs++;                /* counter */
  887 
  888         /*
  889          * get current "free" page count
  890          */
  891         free = uvmexp.free - BUFPAGES_DEFICIT;
  892 
  893 #ifdef __HAVE_PMAP_COLLECT
  894         /*
  895          * swap out some processes if we are below our free target.
  896          * we need to unlock the page queues for this.
  897          */
  898         if (free < uvmexp.freetarg) {
  899                 uvmexp.pdswout++;
  900                 uvm_unlock_pageq();
  901                 uvm_swapout_threads();
  902                 uvm_lock_pageq();
  903         }
  904 #endif
  905 
  906         /*
  907          * now we want to work on meeting our targets.   first we work on our
  908          * free target by converting inactive pages into free pages.  then
  909          * we work on meeting our inactive target by converting active pages
  910          * to inactive ones.
  911          */
  912 
  913         pages_freed = uvmexp.pdfreed;
  914         (void) uvmpd_scan_inactive(pma, &uvm.page_inactive);
  915         pages_freed = uvmexp.pdfreed - pages_freed;
  916 
  917         /*
  918          * we have done the scan to get free pages.   now we work on meeting
  919          * our inactive target.
  920          */
  921         inactive_shortage = uvmexp.inactarg - uvmexp.inactive - BUFPAGES_INACT;
  922 
  923         /*
  924          * detect if we're not going to be able to page anything out
  925          * until we free some swap resources from active pages.
  926          */
  927         free = uvmexp.free - BUFPAGES_DEFICIT;
  928         swap_shortage = 0;
  929         if (free < uvmexp.freetarg &&
  930             uvmexp.swpginuse == uvmexp.swpages &&
  931             !uvm_swapisfull() &&
  932             pages_freed == 0) {
  933                 swap_shortage = uvmexp.freetarg - free;
  934         }
  935 
  936         for (p = TAILQ_FIRST(&uvm.page_active);
  937              p != NULL && (inactive_shortage > 0 || swap_shortage > 0);
  938              p = nextpg) {
  939                 nextpg = TAILQ_NEXT(p, pageq);
  940                 if (p->pg_flags & PG_BUSY) {
  941                         continue;
  942                 }
  943 
  944                 /*
  945                  * lock the page's owner.
  946                  */
  947                 slock = uvmpd_trylockowner(p);
  948                 if (slock == NULL) {
  949                         continue;
  950                 }
  951 
  952                 /*
  953                  * skip this page if it's busy.
  954                  */
  955                 if ((p->pg_flags & PG_BUSY) != 0) {
  956                         rw_exit(slock);
  957                         continue;
  958                 }
  959 
  960                 /*
  961                  * if there's a shortage of swap, free any swap allocated
  962                  * to this page so that other pages can be paged out.
  963                  */
  964                 if (swap_shortage > 0) {
  965                         if ((p->pg_flags & PQ_ANON) && p->uanon->an_swslot) {
  966                                 uvm_swap_free(p->uanon->an_swslot, 1);
  967                                 p->uanon->an_swslot = 0;
  968                                 atomic_clearbits_int(&p->pg_flags, PG_CLEAN);
  969                                 swap_shortage--;
  970                         }
  971                         if (p->pg_flags & PQ_AOBJ) {
  972                                 int slot = uao_set_swslot(p->uobject,
  973                                         p->offset >> PAGE_SHIFT, 0);
  974                                 if (slot) {
  975                                         uvm_swap_free(slot, 1);
  976                                         atomic_clearbits_int(&p->pg_flags,
  977                                             PG_CLEAN);
  978                                         swap_shortage--;
  979                                 }
  980                         }
  981                 }
  982 
  983                 /*
  984                  * deactivate this page if there's a shortage of
  985                  * inactive pages.
  986                  */
  987                 if (inactive_shortage > 0) {
  988                         pmap_page_protect(p, PROT_NONE);
  989                         /* no need to check wire_count as pg is "active" */
  990                         uvm_pagedeactivate(p);
  991                         uvmexp.pddeact++;
  992                         inactive_shortage--;
  993                 }
  994 
  995                 /*
  996                  * we're done with this page.
  997                  */
  998                 rw_exit(slock);
  999         }
 1000 }
 1001 
 1002 #ifdef HIBERNATE
 1003 
 1004 /*
 1005  * uvmpd_drop: drop clean pages from list
 1006  */
 1007 void
 1008 uvmpd_drop(struct pglist *pglst)
 1009 {
 1010         struct vm_page *p, *nextpg;
 1011 
 1012         for (p = TAILQ_FIRST(pglst); p != NULL; p = nextpg) {
 1013                 nextpg = TAILQ_NEXT(p, pageq);
 1014 
 1015                 if (p->pg_flags & PQ_ANON || p->uobject == NULL)
 1016                         continue;
 1017 
 1018                 if (p->pg_flags & PG_BUSY)
 1019                         continue;
 1020 
 1021                 if (p->pg_flags & PG_CLEAN) {
 1022                         struct uvm_object * uobj = p->uobject;
 1023 
 1024                         rw_enter(uobj->vmobjlock, RW_WRITE);
 1025                         uvm_lock_pageq();
 1026                         /*
 1027                          * we now have the page queues locked.
 1028                          * the page is not busy.   if the page is clean we
 1029                          * can free it now and continue.
 1030                          */
 1031                         if (p->pg_flags & PG_CLEAN) {
 1032                                 if (p->pg_flags & PQ_SWAPBACKED) {
 1033                                         /* this page now lives only in swap */
 1034                                         atomic_inc_int(&uvmexp.swpgonly);
 1035                                 }
 1036 
 1037                                 /* zap all mappings with pmap_page_protect... */
 1038                                 pmap_page_protect(p, PROT_NONE);
 1039                                 uvm_pagefree(p);
 1040                         }
 1041                         uvm_unlock_pageq();
 1042                         rw_exit(uobj->vmobjlock);
 1043                 }
 1044         }
 1045 }
 1046 
 1047 void
 1048 uvmpd_hibernate(void)
 1049 {
 1050         uvmpd_drop(&uvm.page_inactive);
 1051         uvmpd_drop(&uvm.page_active);
 1052 }
 1053 
 1054 #endif
Cache object: 6aff6dda1ebcd1d94a80d6693f29203c
[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]
This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.
FreeBSD/Linux Kernel Cross Reference sys/uvm/uvm_pdaemon.c

FreeBSD/Linux Kernel Cross Reference
sys/uvm/uvm_pdaemon.c