The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/uvm/uvm_pdpolicy_clockpro.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*      $NetBSD: uvm_pdpolicy_clockpro.c,v 1.27 2022/04/12 20:27:56 andvar Exp $        */
    2 
    3 /*-
    4  * Copyright (c)2005, 2006 YAMAMOTO Takashi,
    5  * All rights reserved.
    6  *
    7  * Redistribution and use in source and binary forms, with or without
    8  * modification, are permitted provided that the following conditions
    9  * are met:
   10  * 1. Redistributions of source code must retain the above copyright
   11  *    notice, this list of conditions and the following disclaimer.
   12  * 2. Redistributions in binary form must reproduce the above copyright
   13  *    notice, this list of conditions and the following disclaimer in the
   14  *    documentation and/or other materials provided with the distribution.
   15  *
   16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   26  * SUCH DAMAGE.
   27  */
   28 
   29 /*
   30  * CLOCK-Pro replacement policy:
   31  *      http://web.cse.ohio-state.edu/hpcs/WWW/HTML/publications/abs05-3.html
   32  *
   33  * approximation of the list of non-resident pages using hash:
   34  *      http://linux-mm.org/ClockProApproximation
   35  */
   36 
   37 /* #define      CLOCKPRO_DEBUG */
   38 
   39 #if defined(PDSIM)
   40 
   41 #include "pdsim.h"
   42 
   43 #else /* defined(PDSIM) */
   44 
   45 #include <sys/cdefs.h>
   46 __KERNEL_RCSID(0, "$NetBSD: uvm_pdpolicy_clockpro.c,v 1.27 2022/04/12 20:27:56 andvar Exp $");
   47 
   48 #include "opt_ddb.h"
   49 
   50 #include <sys/param.h>
   51 #include <sys/proc.h>
   52 #include <sys/systm.h>
   53 #include <sys/kernel.h>
   54 #include <sys/hash.h>
   55 
   56 #include <uvm/uvm.h>
   57 #include <uvm/uvm_pdaemon.h>    /* for uvmpd_trylockowner */
   58 #include <uvm/uvm_pdpolicy.h>
   59 #include <uvm/uvm_pdpolicy_impl.h>
   60 
   61 #if ((__STDC_VERSION__ - 0) >= 199901L)
   62 #define DPRINTF(...)    /* nothing */
   63 #define WARN(...)       printf(__VA_ARGS__)
   64 #else /* ((__STDC_VERSION__ - 0) >= 199901L) */
   65 #define DPRINTF(a...)   /* nothing */   /* GCC */
   66 #define WARN(a...)      printf(a)
   67 #endif /* ((__STDC_VERSION__ - 0) >= 199901L) */
   68 
   69 #define dump(a)         /* nothing */
   70 
   71 #undef  USEONCE2
   72 #define LISTQ
   73 #undef  ADAPTIVE
   74 
   75 #endif /* defined(PDSIM) */
   76 
   77 #if !defined(CLOCKPRO_COLDPCT)
   78 #define CLOCKPRO_COLDPCT        10
   79 #endif /* !defined(CLOCKPRO_COLDPCT) */
   80 
   81 #define CLOCKPRO_COLDPCTMAX     90
   82 
   83 #if !defined(CLOCKPRO_HASHFACTOR)
   84 #define CLOCKPRO_HASHFACTOR     2
   85 #endif /* !defined(CLOCKPRO_HASHFACTOR) */
   86 
   87 #define CLOCKPRO_NEWQMIN        ((1024 * 1024) >> PAGE_SHIFT)   /* XXX */
   88 
   89 int clockpro_hashfactor = CLOCKPRO_HASHFACTOR;
   90 
   91 PDPOL_EVCNT_DEFINE(nresrecordobj)
   92 PDPOL_EVCNT_DEFINE(nresrecordanon)
   93 PDPOL_EVCNT_DEFINE(nreslookupobj)
   94 PDPOL_EVCNT_DEFINE(nreslookupanon)
   95 PDPOL_EVCNT_DEFINE(nresfoundobj)
   96 PDPOL_EVCNT_DEFINE(nresfoundanon)
   97 PDPOL_EVCNT_DEFINE(nresanonfree)
   98 PDPOL_EVCNT_DEFINE(nresconflict)
   99 PDPOL_EVCNT_DEFINE(nresoverwritten)
  100 PDPOL_EVCNT_DEFINE(nreshandhot)
  101 
  102 PDPOL_EVCNT_DEFINE(hhottakeover)
  103 PDPOL_EVCNT_DEFINE(hhotref)
  104 PDPOL_EVCNT_DEFINE(hhotunref)
  105 PDPOL_EVCNT_DEFINE(hhotcold)
  106 PDPOL_EVCNT_DEFINE(hhotcoldtest)
  107 
  108 PDPOL_EVCNT_DEFINE(hcoldtakeover)
  109 PDPOL_EVCNT_DEFINE(hcoldref)
  110 PDPOL_EVCNT_DEFINE(hcoldunref)
  111 PDPOL_EVCNT_DEFINE(hcoldreftest)
  112 PDPOL_EVCNT_DEFINE(hcoldunreftest)
  113 PDPOL_EVCNT_DEFINE(hcoldunreftestspeculative)
  114 PDPOL_EVCNT_DEFINE(hcoldhot)
  115 
  116 PDPOL_EVCNT_DEFINE(speculativeenqueue)
  117 PDPOL_EVCNT_DEFINE(speculativehit1)
  118 PDPOL_EVCNT_DEFINE(speculativehit2)
  119 PDPOL_EVCNT_DEFINE(speculativemiss)
  120 
  121 PDPOL_EVCNT_DEFINE(locksuccess)
  122 PDPOL_EVCNT_DEFINE(lockfail)
  123 
  124 #define PQ_REFERENCED   0x000000010
  125 #define PQ_HOT          0x000000020
  126 #define PQ_TEST         0x000000040
  127 #define PQ_INITIALREF   0x000000080
  128 #define PQ_QMASK        0x000000700
  129 #define PQ_QFACTOR      0x000000100
  130 #define PQ_SPECULATIVE  0x000000800
  131 
  132 #define CLOCKPRO_NOQUEUE        0
  133 #define CLOCKPRO_NEWQ           1       /* small queue to clear initial ref. */
  134 #if defined(LISTQ)
  135 #define CLOCKPRO_COLDQ          2
  136 #define CLOCKPRO_HOTQ           3
  137 #else /* defined(LISTQ) */
  138 #define CLOCKPRO_COLDQ          (2 + coldqidx)  /* XXX */
  139 #define CLOCKPRO_HOTQ           (3 - coldqidx)  /* XXX */
  140 #endif /* defined(LISTQ) */
  141 #define CLOCKPRO_LISTQ          4
  142 #define CLOCKPRO_NQUEUE         4
  143 
  144 static bool     uvmpdpol_pagerealize_locked(struct vm_page *);
  145 
  146 static inline void
  147 clockpro_setq(struct vm_page *pg, int qidx)
  148 {
  149         KASSERT(qidx >= CLOCKPRO_NOQUEUE);
  150         KASSERT(qidx <= CLOCKPRO_NQUEUE);
  151 
  152         pg->pqflags = (pg->pqflags & ~PQ_QMASK) | (qidx * PQ_QFACTOR);
  153 }
  154 
  155 static inline int
  156 clockpro_getq(struct vm_page *pg)
  157 {
  158         int qidx;
  159 
  160         qidx = (pg->pqflags & PQ_QMASK) / PQ_QFACTOR;
  161         KASSERT(qidx >= CLOCKPRO_NOQUEUE);
  162         KASSERT(qidx <= CLOCKPRO_NQUEUE);
  163         return qidx;
  164 }
  165 
  166 typedef struct {
  167         struct pglist q_q;
  168         int q_len;
  169 } pageq_t;
  170 
  171 struct clockpro_state {
  172         kmutex_t lock;
  173         int s_npages;
  174         int s_coldtarget;
  175         int s_ncold;
  176 
  177         int s_newqlenmax;
  178         pageq_t s_q[CLOCKPRO_NQUEUE];
  179 
  180         struct uvm_pctparam s_coldtargetpct;
  181 };
  182 
  183 static pageq_t *
  184 clockpro_queue(struct clockpro_state *s, int qidx)
  185 {
  186 
  187         KASSERT(CLOCKPRO_NOQUEUE < qidx);
  188         KASSERT(qidx <= CLOCKPRO_NQUEUE);
  189 
  190         return &s->s_q[qidx - 1];
  191 }
  192 
  193 #if !defined(LISTQ)
  194 
  195 static int coldqidx;
  196 
  197 static void
  198 clockpro_switchqueue(void)
  199 {
  200 
  201         coldqidx = 1 - coldqidx;
  202 }
  203 
  204 #endif /* !defined(LISTQ) */
  205 
  206 static struct clockpro_state clockpro __cacheline_aligned;
  207 static struct clockpro_scanstate {
  208         int ss_nscanned;
  209 } scanstate;
  210 
  211 /* ---------------------------------------- */
  212 
  213 static void
  214 pageq_init(pageq_t *q)
  215 {
  216 
  217         TAILQ_INIT(&q->q_q);
  218         q->q_len = 0;
  219 }
  220 
  221 static int
  222 pageq_len(const pageq_t *q)
  223 {
  224 
  225         return q->q_len;
  226 }
  227 
  228 static struct vm_page *
  229 pageq_first(const pageq_t *q)
  230 {
  231 
  232         return TAILQ_FIRST(&q->q_q);
  233 }
  234 
  235 static void
  236 pageq_insert_tail(pageq_t *q, struct vm_page *pg)
  237 {
  238 
  239         TAILQ_INSERT_TAIL(&q->q_q, pg, pdqueue);
  240         q->q_len++;
  241 }
  242 
  243 #if defined(LISTQ)
  244 static void
  245 pageq_insert_head(pageq_t *q, struct vm_page *pg)
  246 {
  247 
  248         TAILQ_INSERT_HEAD(&q->q_q, pg, pdqueue);
  249         q->q_len++;
  250 }
  251 #endif
  252 
  253 static void
  254 pageq_remove(pageq_t *q, struct vm_page *pg)
  255 {
  256 
  257 #if 1
  258         KASSERT(clockpro_queue(&clockpro, clockpro_getq(pg)) == q);
  259 #endif
  260         KASSERT(q->q_len > 0);
  261         TAILQ_REMOVE(&q->q_q, pg, pdqueue);
  262         q->q_len--;
  263 }
  264 
  265 static struct vm_page *
  266 pageq_remove_head(pageq_t *q)
  267 {
  268         struct vm_page *pg;
  269 
  270         pg = TAILQ_FIRST(&q->q_q);
  271         if (pg == NULL) {
  272                 KASSERT(q->q_len == 0);
  273                 return NULL;
  274         }
  275         pageq_remove(q, pg);
  276         return pg;
  277 }
  278 
  279 /* ---------------------------------------- */
  280 
  281 static void
  282 clockpro_insert_tail(struct clockpro_state *s, int qidx, struct vm_page *pg)
  283 {
  284         pageq_t *q = clockpro_queue(s, qidx);
  285         
  286         clockpro_setq(pg, qidx);
  287         pageq_insert_tail(q, pg);
  288 }
  289 
  290 #if defined(LISTQ)
  291 static void
  292 clockpro_insert_head(struct clockpro_state *s, int qidx, struct vm_page *pg)
  293 {
  294         pageq_t *q = clockpro_queue(s, qidx);
  295         
  296         clockpro_setq(pg, qidx);
  297         pageq_insert_head(q, pg);
  298 }
  299 
  300 #endif
  301 /* ---------------------------------------- */
  302 
  303 typedef uint32_t nonres_cookie_t;
  304 #define NONRES_COOKIE_INVAL     0
  305 
  306 typedef uintptr_t objid_t;
  307 
  308 /*
  309  * XXX maybe these hash functions need reconsideration,
  310  * given that hash distribution is critical here.
  311  */
  312 
  313 static uint32_t
  314 pageidentityhash1(objid_t obj, off_t idx)
  315 {
  316         uint32_t hash = HASH32_BUF_INIT;
  317 
  318 #if 1
  319         hash = hash32_buf(&idx, sizeof(idx), hash);
  320         hash = hash32_buf(&obj, sizeof(obj), hash);
  321 #else
  322         hash = hash32_buf(&obj, sizeof(obj), hash);
  323         hash = hash32_buf(&idx, sizeof(idx), hash);
  324 #endif
  325         return hash;
  326 }
  327 
  328 static uint32_t
  329 pageidentityhash2(objid_t obj, off_t idx)
  330 {
  331         uint32_t hash = HASH32_BUF_INIT;
  332 
  333         hash = hash32_buf(&obj, sizeof(obj), hash);
  334         hash = hash32_buf(&idx, sizeof(idx), hash);
  335         return hash;
  336 }
  337 
  338 static nonres_cookie_t
  339 calccookie(objid_t obj, off_t idx)
  340 {
  341         uint32_t hash = pageidentityhash2(obj, idx);
  342         nonres_cookie_t cookie = hash;
  343 
  344         if (__predict_false(cookie == NONRES_COOKIE_INVAL)) {
  345                 cookie++; /* XXX */
  346         }
  347         return cookie;
  348 }
  349 
  350 #define BUCKETSIZE      14
  351 struct bucket {
  352         int cycle;
  353         int cur;
  354         nonres_cookie_t pages[BUCKETSIZE];
  355 };
  356 static int cycle_target;
  357 static int cycle_target_frac;
  358 
  359 static struct bucket static_bucket;
  360 static struct bucket *buckets = &static_bucket;
  361 static size_t hashsize = 1;
  362 
  363 static int coldadj;
  364 #define COLDTARGET_ADJ(d)       coldadj += (d)
  365 
  366 #if defined(PDSIM)
  367 
  368 static void *
  369 clockpro_hashalloc(int n)
  370 {
  371         size_t allocsz = sizeof(*buckets) * n;
  372 
  373         return malloc(allocsz);
  374 }
  375 
  376 static void
  377 clockpro_hashfree(void *p, int n)
  378 {
  379 
  380         free(p);
  381 }
  382 
  383 #else /* defined(PDSIM) */
  384 
  385 static void *
  386 clockpro_hashalloc(int n)
  387 {
  388         size_t allocsz = round_page(sizeof(*buckets) * n);
  389 
  390         return (void *)uvm_km_alloc(kernel_map, allocsz, 0, UVM_KMF_WIRED);
  391 }
  392 
  393 static void
  394 clockpro_hashfree(void *p, int n)
  395 {
  396         size_t allocsz = round_page(sizeof(*buckets) * n);
  397 
  398         uvm_km_free(kernel_map, (vaddr_t)p, allocsz, UVM_KMF_WIRED);
  399 }
  400 
  401 #endif /* defined(PDSIM) */
  402 
  403 static void
  404 clockpro_hashinit(uint64_t n)
  405 {
  406         struct bucket *newbuckets;
  407         struct bucket *oldbuckets;
  408         size_t sz;
  409         size_t oldsz;
  410         int i;
  411 
  412         sz = howmany(n, BUCKETSIZE);
  413         sz *= clockpro_hashfactor;
  414         newbuckets = clockpro_hashalloc(sz);
  415         if (newbuckets == NULL) {
  416                 panic("%s: allocation failure", __func__);
  417         }
  418         for (i = 0; i < sz; i++) {
  419                 struct bucket *b = &newbuckets[i];
  420                 int j;
  421 
  422                 b->cycle = cycle_target;
  423                 b->cur = 0;
  424                 for (j = 0; j < BUCKETSIZE; j++) {
  425                         b->pages[j] = NONRES_COOKIE_INVAL;
  426                 }
  427         }
  428         /* XXX lock */
  429         oldbuckets = buckets;
  430         oldsz = hashsize;
  431         buckets = newbuckets;
  432         hashsize = sz;
  433         /* XXX unlock */
  434         if (oldbuckets != &static_bucket) {
  435                 clockpro_hashfree(oldbuckets, oldsz);
  436         }
  437 }
  438 
  439 static struct bucket *
  440 nonresident_getbucket(objid_t obj, off_t idx)
  441 {
  442         uint32_t hash;
  443 
  444         hash = pageidentityhash1(obj, idx);
  445         return &buckets[hash % hashsize];
  446 }
  447 
  448 static void
  449 nonresident_rotate(struct bucket *b)
  450 {
  451         const int target = cycle_target;
  452         const int cycle = b->cycle;
  453         int cur;
  454         int todo;
  455 
  456         todo = target - cycle;
  457         if (todo >= BUCKETSIZE * 2) {
  458                 todo = (todo % BUCKETSIZE) + BUCKETSIZE;
  459         }
  460         cur = b->cur;
  461         while (todo > 0) {
  462                 if (b->pages[cur] != NONRES_COOKIE_INVAL) {
  463                         PDPOL_EVCNT_INCR(nreshandhot);
  464                         COLDTARGET_ADJ(-1);
  465                 }
  466                 b->pages[cur] = NONRES_COOKIE_INVAL;
  467                 cur++;
  468                 if (cur == BUCKETSIZE) {
  469                         cur = 0;
  470                 }
  471                 todo--;
  472         }
  473         b->cycle = target;
  474         b->cur = cur;
  475 }
  476 
  477 static bool
  478 nonresident_lookupremove(objid_t obj, off_t idx)
  479 {
  480         struct bucket *b = nonresident_getbucket(obj, idx);
  481         nonres_cookie_t cookie = calccookie(obj, idx);
  482         int i;
  483 
  484         nonresident_rotate(b);
  485         for (i = 0; i < BUCKETSIZE; i++) {
  486                 if (b->pages[i] == cookie) {
  487                         b->pages[i] = NONRES_COOKIE_INVAL;
  488                         return true;
  489                 }
  490         }
  491         return false;
  492 }
  493 
  494 static objid_t
  495 pageobj(struct vm_page *pg)
  496 {
  497         const void *obj;
  498 
  499         /*
  500          * XXX object pointer is often freed and reused for unrelated object.
  501          * for vnodes, it would be better to use something like
  502          * a hash of fsid/fileid/generation.
  503          */
  504 
  505         obj = pg->uobject;
  506         if (obj == NULL) {
  507                 obj = pg->uanon;
  508                 KASSERT(obj != NULL);
  509         }
  510         return (objid_t)obj;
  511 }
  512 
  513 static off_t
  514 pageidx(struct vm_page *pg)
  515 {
  516 
  517         KASSERT((pg->offset & PAGE_MASK) == 0);
  518         return pg->offset >> PAGE_SHIFT;
  519 }
  520 
  521 static bool
  522 nonresident_pagelookupremove(struct vm_page *pg)
  523 {
  524         bool found = nonresident_lookupremove(pageobj(pg), pageidx(pg));
  525 
  526         if (pg->uobject) {
  527                 PDPOL_EVCNT_INCR(nreslookupobj);
  528         } else {
  529                 PDPOL_EVCNT_INCR(nreslookupanon);
  530         }
  531         if (found) {
  532                 if (pg->uobject) {
  533                         PDPOL_EVCNT_INCR(nresfoundobj);
  534                 } else {
  535                         PDPOL_EVCNT_INCR(nresfoundanon);
  536                 }
  537         }
  538         return found;
  539 }
  540 
  541 static void
  542 nonresident_pagerecord(struct vm_page *pg)
  543 {
  544         objid_t obj = pageobj(pg);
  545         off_t idx = pageidx(pg);
  546         struct bucket *b = nonresident_getbucket(obj, idx);
  547         nonres_cookie_t cookie = calccookie(obj, idx);
  548 
  549 #if defined(DEBUG)
  550         int i;
  551 
  552         for (i = 0; i < BUCKETSIZE; i++) {
  553                 if (b->pages[i] == cookie) {
  554                         PDPOL_EVCNT_INCR(nresconflict);
  555                 }
  556         }
  557 #endif /* defined(DEBUG) */
  558 
  559         if (pg->uobject) {
  560                 PDPOL_EVCNT_INCR(nresrecordobj);
  561         } else {
  562                 PDPOL_EVCNT_INCR(nresrecordanon);
  563         }
  564         nonresident_rotate(b);
  565         if (b->pages[b->cur] != NONRES_COOKIE_INVAL) {
  566                 PDPOL_EVCNT_INCR(nresoverwritten);
  567                 COLDTARGET_ADJ(-1);
  568         }
  569         b->pages[b->cur] = cookie;
  570         b->cur = (b->cur + 1) % BUCKETSIZE;
  571 }
  572 
  573 /* ---------------------------------------- */
  574 
  575 #if defined(CLOCKPRO_DEBUG)
  576 static void
  577 check_sanity(void)
  578 {
  579 }
  580 #else /* defined(CLOCKPRO_DEBUG) */
  581 #define check_sanity()  /* nothing */
  582 #endif /* defined(CLOCKPRO_DEBUG) */
  583 
  584 static void
  585 clockpro_reinit(void)
  586 {
  587 
  588         KASSERT(mutex_owned(&clockpro.lock));
  589 
  590         clockpro_hashinit(uvmexp.npages);
  591 }
  592 
  593 static void
  594 clockpro_init(void)
  595 {
  596         struct clockpro_state *s = &clockpro;
  597         int i;
  598 
  599         mutex_init(&s->lock, MUTEX_DEFAULT, IPL_NONE);
  600         for (i = 0; i < CLOCKPRO_NQUEUE; i++) {
  601                 pageq_init(&s->s_q[i]);
  602         }
  603         s->s_newqlenmax = 1;
  604         s->s_coldtarget = 1;
  605         uvm_pctparam_init(&s->s_coldtargetpct, CLOCKPRO_COLDPCT, NULL);
  606 }
  607 
  608 static void
  609 clockpro_tune(void)
  610 {
  611         struct clockpro_state *s = &clockpro;
  612         int coldtarget;
  613 
  614         KASSERT(mutex_owned(&s->lock));
  615 
  616 #if defined(ADAPTIVE)
  617         int coldmax = s->s_npages * CLOCKPRO_COLDPCTMAX / 100;
  618         int coldmin = 1;
  619 
  620         coldtarget = s->s_coldtarget;
  621         if (coldtarget + coldadj < coldmin) {
  622                 coldadj = coldmin - coldtarget;
  623         } else if (coldtarget + coldadj > coldmax) {
  624                 coldadj = coldmax - coldtarget;
  625         }
  626         coldtarget += coldadj;
  627 #else /* defined(ADAPTIVE) */
  628         coldtarget = UVM_PCTPARAM_APPLY(&s->s_coldtargetpct, s->s_npages);
  629         if (coldtarget < 1) {
  630                 coldtarget = 1;
  631         }
  632 #endif /* defined(ADAPTIVE) */
  633 
  634         s->s_coldtarget = coldtarget;
  635         s->s_newqlenmax = coldtarget / 4;
  636         if (s->s_newqlenmax < CLOCKPRO_NEWQMIN) {
  637                 s->s_newqlenmax = CLOCKPRO_NEWQMIN;
  638         }
  639 }
  640 
  641 static void
  642 clockpro_movereferencebit(struct vm_page *pg, bool locked)
  643 {
  644         kmutex_t *lock;
  645         bool referenced;
  646 
  647         KASSERT(mutex_owned(&clockpro.lock));
  648         KASSERT(!locked || uvm_page_owner_locked_p(pg, false));
  649         if (!locked) {
  650                 /*
  651                  * acquire interlock to stabilize page identity.
  652                  * if we have caught the page in a state of flux
  653                  * and it should be dequeued, abort.  it will be
  654                  * dequeued later.
  655                  */
  656                 mutex_enter(&pg->interlock);
  657                 if ((pg->uobject == NULL && pg->uanon == NULL) ||
  658                     pg->wire_count > 0) {
  659                         mutex_exit(&pg->interlock);
  660                         PDPOL_EVCNT_INCR(lockfail);
  661                         return;
  662                 }
  663                 mutex_exit(&clockpro.lock);     /* XXX */
  664                 lock = uvmpd_trylockowner(pg);
  665                 /* pg->interlock now dropped */
  666                 mutex_enter(&clockpro.lock);    /* XXX */
  667                 if (lock == NULL) {
  668                         /*
  669                          * XXXuvmplock
  670                          */
  671                         PDPOL_EVCNT_INCR(lockfail);
  672                         return;
  673                 }
  674                 PDPOL_EVCNT_INCR(locksuccess);
  675         }
  676         referenced = pmap_clear_reference(pg);
  677         if (!locked) {
  678                 mutex_exit(lock);
  679         }
  680         if (referenced) {
  681                 pg->pqflags |= PQ_REFERENCED;
  682         }
  683 }
  684 
  685 static void
  686 clockpro_clearreferencebit(struct vm_page *pg, bool locked)
  687 {
  688 
  689         KASSERT(mutex_owned(&clockpro.lock));
  690 
  691         clockpro_movereferencebit(pg, locked);
  692         pg->pqflags &= ~PQ_REFERENCED;
  693 }
  694 
  695 static void
  696 clockpro___newqrotate(int len)
  697 {
  698         struct clockpro_state * const s = &clockpro;
  699         pageq_t * const newq = clockpro_queue(s, CLOCKPRO_NEWQ);
  700         struct vm_page *pg;
  701 
  702         KASSERT(mutex_owned(&s->lock));
  703 
  704         while (pageq_len(newq) > len) {
  705                 pg = pageq_remove_head(newq);
  706                 KASSERT(pg != NULL);
  707                 KASSERT(clockpro_getq(pg) == CLOCKPRO_NEWQ);
  708                 if ((pg->pqflags & PQ_INITIALREF) != 0) {
  709                         clockpro_clearreferencebit(pg, false);
  710                         pg->pqflags &= ~PQ_INITIALREF;
  711                 }
  712                 /* place at the list head */
  713                 clockpro_insert_tail(s, CLOCKPRO_COLDQ, pg);
  714         }
  715 }
  716 
  717 static void
  718 clockpro_newqrotate(void)
  719 {
  720         struct clockpro_state * const s = &clockpro;
  721 
  722         KASSERT(mutex_owned(&s->lock));
  723 
  724         check_sanity();
  725         clockpro___newqrotate(s->s_newqlenmax);
  726         check_sanity();
  727 }
  728 
  729 static void
  730 clockpro_newqflush(int n)
  731 {
  732 
  733         KASSERT(mutex_owned(&clockpro.lock));
  734 
  735         check_sanity();
  736         clockpro___newqrotate(n);
  737         check_sanity();
  738 }
  739 
  740 static void
  741 clockpro_newqflushone(void)
  742 {
  743         struct clockpro_state * const s = &clockpro;
  744 
  745         KASSERT(mutex_owned(&s->lock));
  746 
  747         clockpro_newqflush(
  748             MAX(pageq_len(clockpro_queue(s, CLOCKPRO_NEWQ)) - 1, 0));
  749 }
  750 
  751 /*
  752  * our "tail" is called "list-head" in the paper.
  753  */
  754 
  755 static void
  756 clockpro___enqueuetail(struct vm_page *pg)
  757 {
  758         struct clockpro_state * const s = &clockpro;
  759 
  760         KASSERT(mutex_owned(&s->lock));
  761         KASSERT(clockpro_getq(pg) == CLOCKPRO_NOQUEUE);
  762 
  763         check_sanity();
  764 #if !defined(USEONCE2)
  765         clockpro_insert_tail(s, CLOCKPRO_NEWQ, pg);
  766         clockpro_newqrotate();
  767 #else /* !defined(USEONCE2) */
  768 #if defined(LISTQ)
  769         KASSERT((pg->pqflags & PQ_REFERENCED) == 0);
  770 #endif /* defined(LISTQ) */
  771         clockpro_insert_tail(s, CLOCKPRO_COLDQ, pg);
  772 #endif /* !defined(USEONCE2) */
  773         check_sanity();
  774 }
  775 
  776 static void
  777 clockpro_pageenqueue(struct vm_page *pg)
  778 {
  779         struct clockpro_state * const s = &clockpro;
  780         bool hot;
  781         bool speculative = (pg->pqflags & PQ_SPECULATIVE) != 0; /* XXX */
  782 
  783         KASSERT((~pg->pqflags & (PQ_INITIALREF|PQ_SPECULATIVE)) != 0);
  784         KASSERT(mutex_owned(&s->lock));
  785         check_sanity();
  786         KASSERT(clockpro_getq(pg) == CLOCKPRO_NOQUEUE);
  787         s->s_npages++;
  788         pg->pqflags &= ~(PQ_HOT|PQ_TEST);
  789         if (speculative) {
  790                 hot = false;
  791                 PDPOL_EVCNT_INCR(speculativeenqueue);
  792         } else {
  793                 hot = nonresident_pagelookupremove(pg);
  794                 if (hot) {
  795                         COLDTARGET_ADJ(1);
  796                 }
  797         }
  798 
  799         /*
  800          * consider mmap'ed file:
  801          *
  802          * - read-ahead enqueues a page.
  803          *
  804          * - on the following read-ahead hit, the fault handler activates it.
  805          *
  806          * - finally, the userland code which caused the above fault
  807          *   actually accesses the page.  it makes its reference bit set.
  808          *
  809          * we want to count the above as a single access, rather than
  810          * three accesses with short reuse distances.
  811          */
  812 
  813 #if defined(USEONCE2)
  814         pg->pqflags &= ~PQ_INITIALREF;
  815         if (hot) {
  816                 pg->pqflags |= PQ_TEST;
  817         }
  818         s->s_ncold++;
  819         clockpro_clearreferencebit(pg, false);
  820         clockpro___enqueuetail(pg);
  821 #else /* defined(USEONCE2) */
  822         if (speculative) {
  823                 s->s_ncold++;
  824         } else if (hot) {
  825                 pg->pqflags |= PQ_HOT;
  826         } else {
  827                 pg->pqflags |= PQ_TEST;
  828                 s->s_ncold++;
  829         }
  830         clockpro___enqueuetail(pg);
  831 #endif /* defined(USEONCE2) */
  832         KASSERT(s->s_ncold <= s->s_npages);
  833 }
  834 
  835 static pageq_t *
  836 clockpro_pagequeue(struct vm_page *pg)
  837 {
  838         struct clockpro_state * const s = &clockpro;
  839         int qidx;
  840 
  841         KASSERT(mutex_owned(&s->lock));
  842 
  843         qidx = clockpro_getq(pg);
  844         KASSERT(qidx != CLOCKPRO_NOQUEUE);
  845 
  846         return clockpro_queue(s, qidx);
  847 }
  848 
  849 static void
  850 clockpro_pagedequeue(struct vm_page *pg)
  851 {
  852         struct clockpro_state * const s = &clockpro;
  853         pageq_t *q;
  854 
  855         KASSERT(mutex_owned(&s->lock));
  856 
  857         KASSERT(s->s_npages > 0);
  858         check_sanity();
  859         q = clockpro_pagequeue(pg);
  860         pageq_remove(q, pg);
  861         check_sanity();
  862         clockpro_setq(pg, CLOCKPRO_NOQUEUE);
  863         if ((pg->pqflags & PQ_HOT) == 0) {
  864                 KASSERT(s->s_ncold > 0);
  865                 s->s_ncold--;
  866         }
  867         KASSERT(s->s_npages > 0);
  868         s->s_npages--;
  869         check_sanity();
  870 }
  871 
  872 static void
  873 clockpro_pagerequeue(struct vm_page *pg)
  874 {
  875         struct clockpro_state * const s = &clockpro;
  876         int qidx;
  877 
  878         KASSERT(mutex_owned(&s->lock));
  879 
  880         qidx = clockpro_getq(pg);
  881         KASSERT(qidx == CLOCKPRO_HOTQ || qidx == CLOCKPRO_COLDQ);
  882         pageq_remove(clockpro_queue(s, qidx), pg);
  883         check_sanity();
  884         clockpro_setq(pg, CLOCKPRO_NOQUEUE);
  885 
  886         clockpro___enqueuetail(pg);
  887 }
  888 
  889 static void
  890 handhot_endtest(struct vm_page *pg)
  891 {
  892 
  893         KASSERT(mutex_owned(&clockpro.lock));
  894 
  895         KASSERT((pg->pqflags & PQ_HOT) == 0);
  896         if ((pg->pqflags & PQ_TEST) != 0) {
  897                 PDPOL_EVCNT_INCR(hhotcoldtest);
  898                 COLDTARGET_ADJ(-1);
  899                 pg->pqflags &= ~PQ_TEST;
  900         } else {
  901                 PDPOL_EVCNT_INCR(hhotcold);
  902         }
  903 }
  904 
  905 static void
  906 handhot_advance(void)
  907 {
  908         struct clockpro_state * const s = &clockpro;
  909         struct vm_page *pg;
  910         pageq_t *hotq;
  911         int hotqlen;
  912 
  913         KASSERT(mutex_owned(&s->lock));
  914 
  915         clockpro_tune();
  916 
  917         dump("hot called");
  918         if (s->s_ncold >= s->s_coldtarget) {
  919                 return;
  920         }
  921         hotq = clockpro_queue(s, CLOCKPRO_HOTQ);
  922 again:
  923         pg = pageq_first(hotq);
  924         if (pg == NULL) {
  925                 DPRINTF("%s: HHOT TAKEOVER\n", __func__);
  926                 dump("hhottakeover");
  927                 PDPOL_EVCNT_INCR(hhottakeover);
  928 #if defined(LISTQ)
  929                 while (/* CONSTCOND */ 1) {
  930                         pageq_t *coldq = clockpro_queue(s, CLOCKPRO_COLDQ);
  931 
  932                         pg = pageq_first(coldq);
  933                         if (pg == NULL) {
  934                                 clockpro_newqflushone();
  935                                 pg = pageq_first(coldq);
  936                                 if (pg == NULL) {
  937                                         WARN("hhot: no page?\n");
  938                                         return;
  939                                 }
  940                         }
  941                         KASSERT(clockpro_pagequeue(pg) == coldq);
  942                         pageq_remove(coldq, pg);
  943                         check_sanity();
  944                         if ((pg->pqflags & PQ_HOT) == 0) {
  945                                 handhot_endtest(pg);
  946                                 clockpro_insert_tail(s, CLOCKPRO_LISTQ, pg);
  947                         } else {
  948                                 clockpro_insert_head(s, CLOCKPRO_HOTQ, pg);
  949                                 break;
  950                         }
  951                 }
  952 #else /* defined(LISTQ) */
  953                 clockpro_newqflush(0); /* XXX XXX */
  954                 clockpro_switchqueue();
  955                 hotq = clockpro_queue(s, CLOCKPRO_HOTQ);
  956                 goto again;
  957 #endif /* defined(LISTQ) */
  958         }
  959 
  960         KASSERT(clockpro_pagequeue(pg) == hotq);
  961 
  962         /*
  963          * terminate test period of nonresident pages by cycling them.
  964          */
  965 
  966         cycle_target_frac += BUCKETSIZE;
  967         hotqlen = pageq_len(hotq);
  968         while (cycle_target_frac >= hotqlen) {
  969                 cycle_target++;
  970                 cycle_target_frac -= hotqlen;
  971         }
  972 
  973         if ((pg->pqflags & PQ_HOT) == 0) {
  974 #if defined(LISTQ)
  975                 panic("cold page in hotq: %p", pg);
  976 #else /* defined(LISTQ) */
  977                 handhot_endtest(pg);
  978                 goto next;
  979 #endif /* defined(LISTQ) */
  980         }
  981         KASSERT((pg->pqflags & PQ_TEST) == 0);
  982         KASSERT((pg->pqflags & PQ_INITIALREF) == 0);
  983         KASSERT((pg->pqflags & PQ_SPECULATIVE) == 0);
  984 
  985         /*
  986          * once we met our target,
  987          * stop at a hot page so that no cold pages in test period
  988          * have larger recency than any hot pages.
  989          */
  990 
  991         if (s->s_ncold >= s->s_coldtarget) {
  992                 dump("hot done");
  993                 return;
  994         }
  995         clockpro_movereferencebit(pg, false);
  996         if ((pg->pqflags & PQ_REFERENCED) == 0) {
  997                 PDPOL_EVCNT_INCR(hhotunref);
  998                 uvmexp.pddeact++;
  999                 pg->pqflags &= ~PQ_HOT;
 1000                 clockpro.s_ncold++;
 1001                 KASSERT(s->s_ncold <= s->s_npages);
 1002         } else {
 1003                 PDPOL_EVCNT_INCR(hhotref);
 1004         }
 1005         pg->pqflags &= ~PQ_REFERENCED;
 1006 #if !defined(LISTQ)
 1007 next:
 1008 #endif /* !defined(LISTQ) */
 1009         clockpro_pagerequeue(pg);
 1010         dump("hot");
 1011         goto again;
 1012 }
 1013 
 1014 static struct vm_page *
 1015 handcold_advance(void)
 1016 {
 1017         struct clockpro_state * const s = &clockpro;
 1018         struct vm_page *pg;
 1019 
 1020         KASSERT(mutex_owned(&s->lock));
 1021 
 1022         for (;;) {
 1023 #if defined(LISTQ)
 1024                 pageq_t *listq = clockpro_queue(s, CLOCKPRO_LISTQ);
 1025 #endif /* defined(LISTQ) */
 1026                 pageq_t *coldq;
 1027 
 1028                 clockpro_newqrotate();
 1029                 handhot_advance();
 1030 #if defined(LISTQ)
 1031                 pg = pageq_first(listq);
 1032                 if (pg != NULL) {
 1033                         KASSERT(clockpro_getq(pg) == CLOCKPRO_LISTQ);
 1034                         KASSERT((pg->pqflags & PQ_TEST) == 0);
 1035                         KASSERT((pg->pqflags & PQ_HOT) == 0);
 1036                         KASSERT((pg->pqflags & PQ_INITIALREF) == 0);
 1037                         pageq_remove(listq, pg);
 1038                         check_sanity();
 1039                         clockpro_insert_head(s, CLOCKPRO_COLDQ, pg); /* XXX */
 1040                         goto gotcold;
 1041                 }
 1042 #endif /* defined(LISTQ) */
 1043                 check_sanity();
 1044                 coldq = clockpro_queue(s, CLOCKPRO_COLDQ);
 1045                 pg = pageq_first(coldq);
 1046                 if (pg == NULL) {
 1047                         clockpro_newqflushone();
 1048                         pg = pageq_first(coldq);
 1049                 }
 1050                 if (pg == NULL) {
 1051                         DPRINTF("%s: HCOLD TAKEOVER\n", __func__);
 1052                         dump("hcoldtakeover");
 1053                         PDPOL_EVCNT_INCR(hcoldtakeover);
 1054                         KASSERT(
 1055                             pageq_len(clockpro_queue(s, CLOCKPRO_NEWQ)) == 0);
 1056 #if defined(LISTQ)
 1057                         KASSERT(
 1058                             pageq_len(clockpro_queue(s, CLOCKPRO_HOTQ)) == 0);
 1059 #else /* defined(LISTQ) */
 1060                         clockpro_switchqueue();
 1061                         coldq = clockpro_queue(s, CLOCKPRO_COLDQ);
 1062                         pg = pageq_first(coldq);
 1063 #endif /* defined(LISTQ) */
 1064                 }
 1065                 if (pg == NULL) {
 1066                         WARN("hcold: no page?\n");
 1067                         return NULL;
 1068                 }
 1069                 KASSERT((pg->pqflags & PQ_INITIALREF) == 0);
 1070                 if ((pg->pqflags & PQ_HOT) != 0) {
 1071                         PDPOL_EVCNT_INCR(hcoldhot);
 1072                         pageq_remove(coldq, pg);
 1073                         clockpro_insert_tail(s, CLOCKPRO_HOTQ, pg);
 1074                         check_sanity();
 1075                         KASSERT((pg->pqflags & PQ_TEST) == 0);
 1076                         uvmexp.pdscans++;
 1077                         continue;
 1078                 }
 1079 #if defined(LISTQ)
 1080 gotcold:
 1081 #endif /* defined(LISTQ) */
 1082                 KASSERT((pg->pqflags & PQ_HOT) == 0);
 1083                 uvmexp.pdscans++;
 1084                 clockpro_movereferencebit(pg, false);
 1085                 if ((pg->pqflags & PQ_SPECULATIVE) != 0) {
 1086                         KASSERT((pg->pqflags & PQ_TEST) == 0);
 1087                         if ((pg->pqflags & PQ_REFERENCED) != 0) {
 1088                                 PDPOL_EVCNT_INCR(speculativehit2);
 1089                                 pg->pqflags &= ~(PQ_SPECULATIVE|PQ_REFERENCED);
 1090                                 clockpro_pagedequeue(pg);
 1091                                 clockpro_pageenqueue(pg);
 1092                                 continue;
 1093                         }
 1094                         PDPOL_EVCNT_INCR(speculativemiss);
 1095                 }
 1096                 switch (pg->pqflags & (PQ_REFERENCED|PQ_TEST)) {
 1097                 case PQ_TEST:
 1098                         PDPOL_EVCNT_INCR(hcoldunreftest);
 1099                         nonresident_pagerecord(pg);
 1100                         goto gotit;
 1101                 case 0:
 1102                         PDPOL_EVCNT_INCR(hcoldunref);
 1103 gotit:
 1104                         KASSERT(s->s_ncold > 0);
 1105                         clockpro_pagerequeue(pg); /* XXX */
 1106                         dump("cold done");
 1107                         /* XXX "pg" is still in queue */
 1108                         handhot_advance();
 1109                         goto done;
 1110 
 1111                 case PQ_REFERENCED|PQ_TEST:
 1112                         PDPOL_EVCNT_INCR(hcoldreftest);
 1113                         s->s_ncold--;
 1114                         COLDTARGET_ADJ(1);
 1115                         pg->pqflags |= PQ_HOT;
 1116                         pg->pqflags &= ~PQ_TEST;
 1117                         break;
 1118 
 1119                 case PQ_REFERENCED:
 1120                         PDPOL_EVCNT_INCR(hcoldref);
 1121                         pg->pqflags |= PQ_TEST;
 1122                         break;
 1123                 }
 1124                 pg->pqflags &= ~PQ_REFERENCED;
 1125                 uvmexp.pdreact++;
 1126                 /* move to the list head */
 1127                 clockpro_pagerequeue(pg);
 1128                 dump("cold");
 1129         }
 1130 done:;
 1131         return pg;
 1132 }
 1133 
 1134 static void
 1135 uvmpdpol_pageactivate_locked(struct vm_page *pg)
 1136 {
 1137 
 1138         if (!uvmpdpol_pageisqueued_p(pg)) {
 1139                 KASSERT((pg->pqflags & PQ_SPECULATIVE) == 0);
 1140                 pg->pqflags |= PQ_INITIALREF;
 1141                 clockpro_pageenqueue(pg);
 1142         } else if ((pg->pqflags & PQ_SPECULATIVE)) {
 1143                 PDPOL_EVCNT_INCR(speculativehit1);
 1144                 pg->pqflags &= ~PQ_SPECULATIVE;
 1145                 pg->pqflags |= PQ_INITIALREF;
 1146                 clockpro_pagedequeue(pg);
 1147                 clockpro_pageenqueue(pg);
 1148         }
 1149         pg->pqflags |= PQ_REFERENCED;
 1150 }
 1151 
 1152 void
 1153 uvmpdpol_pageactivate(struct vm_page *pg)
 1154 {
 1155 
 1156         uvmpdpol_set_intent(pg, PQ_INTENT_A);
 1157 }
 1158 
 1159 static void
 1160 uvmpdpol_pagedeactivate_locked(struct vm_page *pg)
 1161 {
 1162 
 1163         clockpro_clearreferencebit(pg, true);
 1164 }
 1165 
 1166 void
 1167 uvmpdpol_pagedeactivate(struct vm_page *pg)
 1168 {
 1169 
 1170         uvmpdpol_set_intent(pg, PQ_INTENT_I);
 1171 }
 1172 
 1173 static void
 1174 uvmpdpol_pagedequeue_locked(struct vm_page *pg)
 1175 {
 1176 
 1177         if (!uvmpdpol_pageisqueued_p(pg)) {
 1178                 return;
 1179         }
 1180         clockpro_pagedequeue(pg);
 1181         pg->pqflags &= ~(PQ_INITIALREF|PQ_SPECULATIVE);
 1182 }
 1183 
 1184 void
 1185 uvmpdpol_pagedequeue(struct vm_page *pg)
 1186 {
 1187 
 1188         uvmpdpol_set_intent(pg, PQ_INTENT_D);
 1189 }
 1190 
 1191 static void
 1192 uvmpdpol_pageenqueue_locked(struct vm_page *pg)
 1193 {
 1194 
 1195 #if 1
 1196         if (uvmpdpol_pageisqueued_p(pg)) {
 1197                 return;
 1198         }
 1199         clockpro_clearreferencebit(pg, true);
 1200         pg->pqflags |= PQ_SPECULATIVE;
 1201         clockpro_pageenqueue(pg);
 1202 #else
 1203         uvmpdpol_pageactivate_locked(pg);
 1204 #endif
 1205 }
 1206 
 1207 void
 1208 uvmpdpol_pageenqueue(struct vm_page *pg)
 1209 {
 1210 
 1211         uvmpdpol_set_intent(pg, PQ_INTENT_D);
 1212 }
 1213 
 1214 static bool
 1215 uvmpdpol_pagerealize_locked(struct vm_page *pg)
 1216 {
 1217         uint32_t pqflags;
 1218 
 1219         KASSERT(mutex_owned(&clockpro.lock));
 1220         KASSERT(mutex_owned(&pg->interlock));
 1221 
 1222         /* XXX this needs to be called from elsewhere, like uvmpdpol_clock. */
 1223 
 1224         pqflags = pg->pqflags;
 1225         pq->pqflags &= ~(PQ_INTENT_SET | PQ_INTENT_QUEUED);
 1226         switch (pqflags & (PQ_INTENT_MASK | PQ_INTENT_SET)) {
 1227         case PQ_INTENT_A | PQ_INTENT_SET:
 1228                 uvmpdpol_pageactivate_locked(pg);
 1229                 return true;
 1230         case PQ_INTENT_E | PQ_INTENT_SET:
 1231                 uvmpdpol_pageenqueue_locked(pg);
 1232                 return true;
 1233         case PQ_INTENT_I | PQ_INTENT_SET:
 1234                 uvmpdpol_pagedeactivate_locked(pg);
 1235                 return true;
 1236         case PQ_INTENT_D | PQ_INTENT_SET:
 1237                 uvmpdpol_pagedequeue_locked(pg);
 1238                 return true;
 1239         default:
 1240                 return false;
 1241         }
 1242 }
 1243 
 1244 void
 1245 uvmpdpol_pagerealize(struct vm_page *pg)
 1246 {
 1247         struct clockpro_state * const s = &clockpro;
 1248 
 1249         mutex_enter(&s->lock);
 1250         uvmpdpol_pagerealize_locked(pg);
 1251         mutex_exit(&s->lock);
 1252 }
 1253 
 1254 void
 1255 uvmpdpol_anfree(struct vm_anon *an)
 1256 {
 1257         struct clockpro_state * const s = &clockpro;
 1258 
 1259         KASSERT(an->an_page == NULL);
 1260         mutex_enter(&s->lock);
 1261         if (nonresident_lookupremove((objid_t)an, 0)) {
 1262                 PDPOL_EVCNT_INCR(nresanonfree);
 1263         }
 1264         mutex_exit(&s->lock);
 1265 }
 1266 
 1267 void
 1268 uvmpdpol_init(void)
 1269 {
 1270 
 1271         clockpro_init();
 1272 }
 1273 
 1274 void
 1275 uvmpdpol_reinit(void)
 1276 {
 1277         struct clockpro_state * const s = &clockpro;
 1278 
 1279         mutex_enter(&s->lock);
 1280         clockpro_reinit();
 1281         mutex_exit(&s->lock);
 1282 }
 1283 
 1284 void
 1285 uvmpdpol_estimatepageable(int *active, int *inactive)
 1286 {
 1287         struct clockpro_state * const s = &clockpro;
 1288 
 1289         /*
 1290          * Don't take any locks here.  This can be called from DDB, and in
 1291          * any case the numbers are stale the instant the lock is dropped,
 1292          * so it just doesn't matter.
 1293          */
 1294         if (active) {
 1295                 *active = s->s_npages - s->s_ncold;
 1296         }
 1297         if (inactive) {
 1298                 *inactive = s->s_ncold;
 1299         }
 1300 }
 1301 
 1302 bool
 1303 uvmpdpol_pageisqueued_p(struct vm_page *pg)
 1304 {
 1305 
 1306         /* Unlocked check OK due to page lifecycle. */
 1307         return clockpro_getq(pg) != CLOCKPRO_NOQUEUE;
 1308 }
 1309 
 1310 bool
 1311 uvmpdpol_pageactivate_p(struct vm_page *pg)
 1312 {
 1313 
 1314         /* For now, no heuristic, always receive activations. */
 1315         return true;
 1316 }
 1317 
 1318 void
 1319 uvmpdpol_scaninit(void)
 1320 {
 1321         struct clockpro_state * const s = &clockpro;
 1322         struct clockpro_scanstate * const ss = &scanstate;
 1323 
 1324         mutex_enter(&s->lock);
 1325         ss->ss_nscanned = 0;
 1326         mutex_exit(&s->lock);
 1327 }
 1328 
 1329 void
 1330 uvmpdpol_scanfini(void)
 1331 {
 1332 
 1333 }
 1334 
 1335 struct vm_page *
 1336 uvmpdpol_selectvictim(kmutex_t **plock)
 1337 {
 1338         struct clockpro_state * const s = &clockpro;
 1339         struct clockpro_scanstate * const ss = &scanstate;
 1340         struct vm_page *pg;
 1341         kmutex_t *lock = NULL;
 1342 
 1343         do {
 1344                 mutex_enter(&s->lock);
 1345                 if (ss->ss_nscanned > s->s_npages) {
 1346                         DPRINTF("scan too much\n");
 1347                         mutex_exit(&s->lock);
 1348                         return NULL;
 1349                 }
 1350                 pg = handcold_advance();
 1351                 if (pg == NULL) {
 1352                         mutex_exit(&s->lock);
 1353                         break;
 1354                 }
 1355                 ss->ss_nscanned++;
 1356                 /*
 1357                  * acquire interlock to stabilize page identity.
 1358                  * if we have caught the page in a state of flux
 1359                  * and it should be dequeued, do it now and then
 1360                  * move on to the next.
 1361                  */
 1362                 mutex_enter(&pg->interlock);
 1363                 if ((pg->uobject == NULL && pg->uanon == NULL) ||
 1364                     pg->wire_count > 0) {
 1365                         mutex_exit(&pg->interlock);
 1366                         clockpro_pagedequeue(pg);
 1367                         pg->pqflags &= ~(PQ_INITIALREF|PQ_SPECULATIVE);
 1368                         continue;
 1369                 }
 1370                 mutex_exit(&s->lock);
 1371                 lock = uvmpd_trylockowner(pg);
 1372                 /* pg->interlock now dropped */
 1373         } while (lock == NULL);
 1374         *plock = lock;
 1375         return pg;
 1376 }
 1377 
 1378 static void
 1379 clockpro_dropswap(pageq_t *q, int *todo)
 1380 {
 1381         struct vm_page *pg;
 1382         kmutex_t *lock;
 1383 
 1384         KASSERT(mutex_owned(&clockpro.lock));
 1385 
 1386         TAILQ_FOREACH_REVERSE(pg, &q->q_q, pglist, pdqueue) {
 1387                 if (*todo <= 0) {
 1388                         break;
 1389                 }
 1390                 if ((pg->pqflags & PQ_HOT) == 0) {
 1391                         continue;
 1392                 }
 1393                 mutex_enter(&pg->interlock);
 1394                 if ((pg->flags & PG_SWAPBACKED) == 0) {
 1395                         mutex_exit(&pg->interlock);
 1396                         continue;
 1397                 }
 1398 
 1399                 /*
 1400                  * try to lock the object that owns the page.
 1401                  */
 1402                 mutex_exit(&clockpro.lock);
 1403                 lock = uvmpd_trylockowner(pg);
 1404                 /* pg->interlock now released */
 1405                 mutex_enter(&clockpro.lock);
 1406                 if (lock == NULL) {
 1407                         /* didn't get it - try the next page. */
 1408                         /* XXXAD lost position in queue */
 1409                         continue;
 1410                 }
 1411 
 1412                 /*
 1413                  * if there's a shortage of swap slots, try to free it.
 1414                  */
 1415                 if ((pg->flags & PG_SWAPBACKED) != 0 &&
 1416                     (pg->flags & PG_BUSY) == 0) {
 1417                         if (uvmpd_dropswap(pg)) {
 1418                                 (*todo)--;
 1419                         }
 1420                 }
 1421                 mutex_exit(lock);
 1422         }
 1423 }
 1424 
 1425 void
 1426 uvmpdpol_balancequeue(int swap_shortage)
 1427 {
 1428         struct clockpro_state * const s = &clockpro;
 1429         int todo = swap_shortage;
 1430 
 1431         if (todo == 0) {
 1432                 return;
 1433         }
 1434 
 1435         /*
 1436          * reclaim swap slots from hot pages
 1437          */
 1438 
 1439         DPRINTF("%s: swap_shortage=%d\n", __func__, swap_shortage);
 1440 
 1441         mutex_enter(&s->lock);
 1442         clockpro_dropswap(clockpro_queue(s, CLOCKPRO_NEWQ), &todo);
 1443         clockpro_dropswap(clockpro_queue(s, CLOCKPRO_COLDQ), &todo);
 1444         clockpro_dropswap(clockpro_queue(s, CLOCKPRO_HOTQ), &todo);
 1445         mutex_exit(&s->lock);
 1446 
 1447         DPRINTF("%s: done=%d\n", __func__, swap_shortage - todo);
 1448 }
 1449 
 1450 bool
 1451 uvmpdpol_needsscan_p(void)
 1452 {
 1453         struct clockpro_state * const s = &clockpro;
 1454 
 1455         /* This must be an unlocked check: can be called from interrupt. */
 1456         return s->s_ncold < s->s_coldtarget;
 1457 }
 1458 
 1459 void
 1460 uvmpdpol_tune(void)
 1461 {
 1462         struct clockpro_state * const s = &clockpro;
 1463 
 1464         mutex_enter(&s->lock);
 1465         clockpro_tune();
 1466         mutex_exit(&s->lock);
 1467 }
 1468 
 1469 void
 1470 uvmpdpol_idle(void)
 1471 {
 1472 
 1473 }
 1474 
 1475 #if !defined(PDSIM)
 1476 
 1477 #include <sys/sysctl.h> /* XXX SYSCTL_DESCR */
 1478 
 1479 void
 1480 uvmpdpol_sysctlsetup(void)
 1481 {
 1482 #if !defined(ADAPTIVE)
 1483         struct clockpro_state * const s = &clockpro;
 1484 
 1485         uvm_pctparam_createsysctlnode(&s->s_coldtargetpct, "coldtargetpct",
 1486             SYSCTL_DESCR("Percentage cold target queue of the entire queue"));
 1487 #endif /* !defined(ADAPTIVE) */
 1488 }
 1489 
 1490 #endif /* !defined(PDSIM) */
 1491 
 1492 #if defined(DDB)
 1493 
 1494 #if 0 /* XXXuvmplock */
 1495 #define _pmap_is_referenced(pg) pmap_is_referenced(pg)
 1496 #else
 1497 #define _pmap_is_referenced(pg) false
 1498 #endif
 1499 
 1500 void clockpro_dump(void);
 1501 
 1502 void
 1503 clockpro_dump(void)
 1504 {
 1505         struct clockpro_state * const s = &clockpro;
 1506 
 1507         struct vm_page *pg;
 1508         int ncold, nhot, ntest, nspeculative, ninitialref, nref;
 1509         int newqlen, coldqlen, hotqlen, listqlen;
 1510 
 1511         newqlen = coldqlen = hotqlen = listqlen = 0;
 1512         printf("npages=%d, ncold=%d, coldtarget=%d, newqlenmax=%d\n",
 1513             s->s_npages, s->s_ncold, s->s_coldtarget, s->s_newqlenmax);
 1514 
 1515 #define INITCOUNT()     \
 1516         ncold = nhot = ntest = nspeculative = ninitialref = nref = 0
 1517 
 1518 #define COUNT(pg)       \
 1519         if ((pg->pqflags & PQ_HOT) != 0) { \
 1520                 nhot++; \
 1521         } else { \
 1522                 ncold++; \
 1523                 if ((pg->pqflags & PQ_TEST) != 0) { \
 1524                         ntest++; \
 1525                 } \
 1526                 if ((pg->pqflags & PQ_SPECULATIVE) != 0) { \
 1527                         nspeculative++; \
 1528                 } \
 1529                 if ((pg->pqflags & PQ_INITIALREF) != 0) { \
 1530                         ninitialref++; \
 1531                 } else if ((pg->pqflags & PQ_REFERENCED) != 0 || \
 1532                     _pmap_is_referenced(pg)) { \
 1533                         nref++; \
 1534                 } \
 1535         }
 1536 
 1537 #define PRINTCOUNT(name)        \
 1538         printf("%s hot=%d, cold=%d, test=%d, speculative=%d, initialref=%d, " \
 1539             "nref=%d\n", \
 1540             (name), nhot, ncold, ntest, nspeculative, ninitialref, nref)
 1541 
 1542         INITCOUNT();
 1543         TAILQ_FOREACH(pg, &clockpro_queue(s, CLOCKPRO_NEWQ)->q_q, pdqueue) {
 1544                 if (clockpro_getq(pg) != CLOCKPRO_NEWQ) {
 1545                         printf("newq corrupt %p\n", pg);
 1546                 }
 1547                 COUNT(pg)
 1548                 newqlen++;
 1549         }
 1550         PRINTCOUNT("newq");
 1551 
 1552         INITCOUNT();
 1553         TAILQ_FOREACH(pg, &clockpro_queue(s, CLOCKPRO_COLDQ)->q_q, pdqueue) {
 1554                 if (clockpro_getq(pg) != CLOCKPRO_COLDQ) {
 1555                         printf("coldq corrupt %p\n", pg);
 1556                 }
 1557                 COUNT(pg)
 1558                 coldqlen++;
 1559         }
 1560         PRINTCOUNT("coldq");
 1561 
 1562         INITCOUNT();
 1563         TAILQ_FOREACH(pg, &clockpro_queue(s, CLOCKPRO_HOTQ)->q_q, pdqueue) {
 1564                 if (clockpro_getq(pg) != CLOCKPRO_HOTQ) {
 1565                         printf("hotq corrupt %p\n", pg);
 1566                 }
 1567 #if defined(LISTQ)
 1568                 if ((pg->pqflags & PQ_HOT) == 0) {
 1569                         printf("cold page in hotq: %p\n", pg);
 1570                 }
 1571 #endif /* defined(LISTQ) */
 1572                 COUNT(pg)
 1573                 hotqlen++;
 1574         }
 1575         PRINTCOUNT("hotq");
 1576 
 1577         INITCOUNT();
 1578         TAILQ_FOREACH(pg, &clockpro_queue(s, CLOCKPRO_LISTQ)->q_q, pdqueue) {
 1579 #if !defined(LISTQ)
 1580                 printf("listq %p\n", pg);
 1581 #endif /* !defined(LISTQ) */
 1582                 if (clockpro_getq(pg) != CLOCKPRO_LISTQ) {
 1583                         printf("listq corrupt %p\n", pg);
 1584                 }
 1585                 COUNT(pg)
 1586                 listqlen++;
 1587         }
 1588         PRINTCOUNT("listq");
 1589 
 1590         printf("newqlen=%d/%d, coldqlen=%d/%d, hotqlen=%d/%d, listqlen=%d/%d\n",
 1591             newqlen, pageq_len(clockpro_queue(s, CLOCKPRO_NEWQ)),
 1592             coldqlen, pageq_len(clockpro_queue(s, CLOCKPRO_COLDQ)),
 1593             hotqlen, pageq_len(clockpro_queue(s, CLOCKPRO_HOTQ)),
 1594             listqlen, pageq_len(clockpro_queue(s, CLOCKPRO_LISTQ)));
 1595 }
 1596 
 1597 #endif /* defined(DDB) */
 1598 
 1599 #if defined(PDSIM)
 1600 #if defined(DEBUG)
 1601 static void
 1602 pdsim_dumpq(int qidx)
 1603 {
 1604         struct clockpro_state * const s = &clockpro;
 1605         pageq_t *q = clockpro_queue(s, qidx);
 1606         struct vm_page *pg;
 1607 
 1608         TAILQ_FOREACH(pg, &q->q_q, pdqueue) {
 1609                 DPRINTF(" %" PRIu64 "%s%s%s%s%s%s",
 1610                     pg->offset >> PAGE_SHIFT,
 1611                     (pg->pqflags & PQ_HOT) ? "H" : "",
 1612                     (pg->pqflags & PQ_TEST) ? "T" : "",
 1613                     (pg->pqflags & PQ_REFERENCED) ? "R" : "",
 1614                     _pmap_is_referenced(pg) ? "r" : "",
 1615                     (pg->pqflags & PQ_INITIALREF) ? "I" : "",
 1616                     (pg->pqflags & PQ_SPECULATIVE) ? "S" : ""
 1617                     );
 1618         }
 1619 }
 1620 #endif /* defined(DEBUG) */
 1621 
 1622 void
 1623 pdsim_dump(const char *id)
 1624 {
 1625 #if defined(DEBUG)
 1626         struct clockpro_state * const s = &clockpro;
 1627 
 1628         DPRINTF("  %s L(", id);
 1629         pdsim_dumpq(CLOCKPRO_LISTQ);
 1630         DPRINTF(" ) H(");
 1631         pdsim_dumpq(CLOCKPRO_HOTQ);
 1632         DPRINTF(" ) C(");
 1633         pdsim_dumpq(CLOCKPRO_COLDQ);
 1634         DPRINTF(" ) N(");
 1635         pdsim_dumpq(CLOCKPRO_NEWQ);
 1636         DPRINTF(" ) ncold=%d/%d, coldadj=%d\n",
 1637             s->s_ncold, s->s_coldtarget, coldadj);
 1638 #endif /* defined(DEBUG) */
 1639 }
 1640 #endif /* defined(PDSIM) */

Cache object: 2f26ed1b13c7179cda45ff9dea039e62


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.