1 /* $NetBSD: uvm_pdpolicy_clock.c,v 1.40 2022/04/12 20:27:56 andvar Exp $ */
2 /* NetBSD: uvm_pdaemon.c,v 1.72 2006/01/05 10:47:33 yamt Exp $ */
3
4 /*-
5 * Copyright (c) 2019, 2020 The NetBSD Foundation, Inc.
6 * All rights reserved.
7 *
8 * This code is derived from software contributed to The NetBSD Foundation
9 * by Andrew Doran.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 /*
34 * Copyright (c) 1997 Charles D. Cranor and Washington University.
35 * Copyright (c) 1991, 1993, The Regents of the University of California.
36 *
37 * All rights reserved.
38 *
39 * This code is derived from software contributed to Berkeley by
40 * The Mach Operating System project at Carnegie-Mellon University.
41 *
42 * Redistribution and use in source and binary forms, with or without
43 * modification, are permitted provided that the following conditions
44 * are met:
45 * 1. Redistributions of source code must retain the above copyright
46 * notice, this list of conditions and the following disclaimer.
47 * 2. Redistributions in binary form must reproduce the above copyright
48 * notice, this list of conditions and the following disclaimer in the
49 * documentation and/or other materials provided with the distribution.
50 * 3. Neither the name of the University nor the names of its contributors
51 * may be used to endorse or promote products derived from this software
52 * without specific prior written permission.
53 *
54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
57 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64 * SUCH DAMAGE.
65 *
66 * @(#)vm_pageout.c 8.5 (Berkeley) 2/14/94
67 * from: Id: uvm_pdaemon.c,v 1.1.2.32 1998/02/06 05:26:30 chs Exp
68 *
69 *
70 * Copyright (c) 1987, 1990 Carnegie-Mellon University.
71 * All rights reserved.
72 *
73 * Permission to use, copy, modify and distribute this software and
74 * its documentation is hereby granted, provided that both the copyright
75 * notice and this permission notice appear in all copies of the
76 * software, derivative works or modified versions, and any portions
77 * thereof, and that both notices appear in supporting documentation.
78 *
79 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
80 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
81 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
82 *
83 * Carnegie Mellon requests users of this software to return to
84 *
85 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
86 * School of Computer Science
87 * Carnegie Mellon University
88 * Pittsburgh PA 15213-3890
89 *
90 * any improvements or extensions that they make and grant Carnegie the
91 * rights to redistribute these changes.
92 */
93
94 #if defined(PDSIM)
95
96 #include "pdsim.h"
97
98 #else /* defined(PDSIM) */
99
100 #include <sys/cdefs.h>
101 __KERNEL_RCSID(0, "$NetBSD: uvm_pdpolicy_clock.c,v 1.40 2022/04/12 20:27:56 andvar Exp $");
102
103 #include <sys/param.h>
104 #include <sys/proc.h>
105 #include <sys/systm.h>
106 #include <sys/kernel.h>
107 #include <sys/kmem.h>
108 #include <sys/atomic.h>
109
110 #include <uvm/uvm.h>
111 #include <uvm/uvm_pdpolicy.h>
112 #include <uvm/uvm_pdpolicy_impl.h>
113 #include <uvm/uvm_stat.h>
114
115 #endif /* defined(PDSIM) */
116
117 /*
118 * per-CPU queue of pending page status changes. 128 entries makes for a
119 * 1kB queue on _LP64 and has been found to be a reasonable compromise that
120 * keeps lock contention events and wait times low, while not using too much
121 * memory nor allowing global state to fall too far behind.
122 */
123 #if !defined(CLOCK_PDQ_SIZE)
124 #define CLOCK_PDQ_SIZE 128
125 #endif /* !defined(CLOCK_PDQ_SIZE) */
126
127 #define PQ_INACTIVE 0x00000010 /* page is in inactive list */
128 #define PQ_ACTIVE 0x00000020 /* page is in active list */
129
130 #if !defined(CLOCK_INACTIVEPCT)
131 #define CLOCK_INACTIVEPCT 33
132 #endif /* !defined(CLOCK_INACTIVEPCT) */
133
134 struct uvmpdpol_globalstate {
135 kmutex_t lock; /* lock on state */
136 /* <= compiler pads here */
137 struct pglist s_activeq /* allocated pages, in use */
138 __aligned(COHERENCY_UNIT);
139 struct pglist s_inactiveq; /* pages between the clock hands */
140 int s_active;
141 int s_inactive;
142 int s_inactarg;
143 struct uvm_pctparam s_anonmin;
144 struct uvm_pctparam s_filemin;
145 struct uvm_pctparam s_execmin;
146 struct uvm_pctparam s_anonmax;
147 struct uvm_pctparam s_filemax;
148 struct uvm_pctparam s_execmax;
149 struct uvm_pctparam s_inactivepct;
150 };
151
152 struct uvmpdpol_scanstate {
153 bool ss_anonreact, ss_filereact, ss_execreact;
154 struct vm_page ss_marker;
155 };
156
157 static void uvmpdpol_pageactivate_locked(struct vm_page *);
158 static void uvmpdpol_pagedeactivate_locked(struct vm_page *);
159 static void uvmpdpol_pagedequeue_locked(struct vm_page *);
160 static bool uvmpdpol_pagerealize_locked(struct vm_page *);
161 static struct uvm_cpu *uvmpdpol_flush(void);
162
163 static struct uvmpdpol_globalstate pdpol_state __cacheline_aligned;
164 static struct uvmpdpol_scanstate pdpol_scanstate;
165
166 PDPOL_EVCNT_DEFINE(reactexec)
167 PDPOL_EVCNT_DEFINE(reactfile)
168 PDPOL_EVCNT_DEFINE(reactanon)
169
170 static void
171 clock_tune(void)
172 {
173 struct uvmpdpol_globalstate *s = &pdpol_state;
174
175 s->s_inactarg = UVM_PCTPARAM_APPLY(&s->s_inactivepct,
176 s->s_active + s->s_inactive);
177 if (s->s_inactarg <= uvmexp.freetarg) {
178 s->s_inactarg = uvmexp.freetarg + 1;
179 }
180 }
181
182 void
183 uvmpdpol_scaninit(void)
184 {
185 struct uvmpdpol_globalstate *s = &pdpol_state;
186 struct uvmpdpol_scanstate *ss = &pdpol_scanstate;
187 int t;
188 bool anonunder, fileunder, execunder;
189 bool anonover, fileover, execover;
190 bool anonreact, filereact, execreact;
191 int64_t freepg, anonpg, filepg, execpg;
192
193 /*
194 * decide which types of pages we want to reactivate instead of freeing
195 * to keep usage within the minimum and maximum usage limits.
196 * uvm_availmem() will sync the counters.
197 */
198
199 freepg = uvm_availmem(false);
200 anonpg = cpu_count_get(CPU_COUNT_ANONCLEAN) +
201 cpu_count_get(CPU_COUNT_ANONDIRTY) +
202 cpu_count_get(CPU_COUNT_ANONUNKNOWN);
203 execpg = cpu_count_get(CPU_COUNT_EXECPAGES);
204 filepg = cpu_count_get(CPU_COUNT_FILECLEAN) +
205 cpu_count_get(CPU_COUNT_FILEDIRTY) +
206 cpu_count_get(CPU_COUNT_FILEUNKNOWN) -
207 execpg;
208
209 mutex_enter(&s->lock);
210 t = s->s_active + s->s_inactive + freepg;
211 anonunder = anonpg <= UVM_PCTPARAM_APPLY(&s->s_anonmin, t);
212 fileunder = filepg <= UVM_PCTPARAM_APPLY(&s->s_filemin, t);
213 execunder = execpg <= UVM_PCTPARAM_APPLY(&s->s_execmin, t);
214 anonover = anonpg > UVM_PCTPARAM_APPLY(&s->s_anonmax, t);
215 fileover = filepg > UVM_PCTPARAM_APPLY(&s->s_filemax, t);
216 execover = execpg > UVM_PCTPARAM_APPLY(&s->s_execmax, t);
217 anonreact = anonunder || (!anonover && (fileover || execover));
218 filereact = fileunder || (!fileover && (anonover || execover));
219 execreact = execunder || (!execover && (anonover || fileover));
220 if (filereact && execreact && (anonreact || uvm_swapisfull())) {
221 anonreact = filereact = execreact = false;
222 }
223 ss->ss_anonreact = anonreact;
224 ss->ss_filereact = filereact;
225 ss->ss_execreact = execreact;
226 memset(&ss->ss_marker, 0, sizeof(ss->ss_marker));
227 ss->ss_marker.flags = PG_MARKER;
228 TAILQ_INSERT_HEAD(&pdpol_state.s_inactiveq, &ss->ss_marker, pdqueue);
229 mutex_exit(&s->lock);
230 }
231
232 void
233 uvmpdpol_scanfini(void)
234 {
235 struct uvmpdpol_globalstate *s = &pdpol_state;
236 struct uvmpdpol_scanstate *ss = &pdpol_scanstate;
237
238 mutex_enter(&s->lock);
239 TAILQ_REMOVE(&pdpol_state.s_inactiveq, &ss->ss_marker, pdqueue);
240 mutex_exit(&s->lock);
241 }
242
243 struct vm_page *
244 uvmpdpol_selectvictim(krwlock_t **plock)
245 {
246 struct uvmpdpol_globalstate *s = &pdpol_state;
247 struct uvmpdpol_scanstate *ss = &pdpol_scanstate;
248 struct vm_page *pg;
249 krwlock_t *lock;
250
251 mutex_enter(&s->lock);
252 while (/* CONSTCOND */ 1) {
253 struct vm_anon *anon;
254 struct uvm_object *uobj;
255
256 pg = TAILQ_NEXT(&ss->ss_marker, pdqueue);
257 if (pg == NULL) {
258 break;
259 }
260 KASSERT((pg->flags & PG_MARKER) == 0);
261 uvmexp.pdscans++;
262
263 /*
264 * acquire interlock to stabilize page identity.
265 * if we have caught the page in a state of flux
266 * deal with it and retry.
267 */
268 mutex_enter(&pg->interlock);
269 if (uvmpdpol_pagerealize_locked(pg)) {
270 mutex_exit(&pg->interlock);
271 continue;
272 }
273
274 /*
275 * now prepare to move on to the next page.
276 */
277 TAILQ_REMOVE(&pdpol_state.s_inactiveq, &ss->ss_marker,
278 pdqueue);
279 TAILQ_INSERT_AFTER(&pdpol_state.s_inactiveq, pg,
280 &ss->ss_marker, pdqueue);
281
282 /*
283 * enforce the minimum thresholds on different
284 * types of memory usage. if reusing the current
285 * page would reduce that type of usage below its
286 * minimum, reactivate the page instead and move
287 * on to the next page.
288 */
289 anon = pg->uanon;
290 uobj = pg->uobject;
291 if (uobj && UVM_OBJ_IS_VTEXT(uobj) && ss->ss_execreact) {
292 uvmpdpol_pageactivate_locked(pg);
293 mutex_exit(&pg->interlock);
294 PDPOL_EVCNT_INCR(reactexec);
295 continue;
296 }
297 if (uobj && UVM_OBJ_IS_VNODE(uobj) &&
298 !UVM_OBJ_IS_VTEXT(uobj) && ss->ss_filereact) {
299 uvmpdpol_pageactivate_locked(pg);
300 mutex_exit(&pg->interlock);
301 PDPOL_EVCNT_INCR(reactfile);
302 continue;
303 }
304 if ((anon || UVM_OBJ_IS_AOBJ(uobj)) && ss->ss_anonreact) {
305 uvmpdpol_pageactivate_locked(pg);
306 mutex_exit(&pg->interlock);
307 PDPOL_EVCNT_INCR(reactanon);
308 continue;
309 }
310
311 /*
312 * try to lock the object that owns the page.
313 *
314 * with the page interlock held, we can drop s->lock, which
315 * could otherwise serve as a barrier to us getting the
316 * object locked, because the owner of the object's lock may
317 * be blocked on s->lock (i.e. a deadlock).
318 *
319 * whatever happens, uvmpd_trylockowner() will release the
320 * interlock. with the interlock dropped we can then
321 * re-acquire our own lock. the order is:
322 *
323 * object -> pdpol -> interlock.
324 */
325 mutex_exit(&s->lock);
326 lock = uvmpd_trylockowner(pg);
327 /* pg->interlock now released */
328 mutex_enter(&s->lock);
329 if (lock == NULL) {
330 /* didn't get it - try the next page. */
331 continue;
332 }
333
334 /*
335 * move referenced pages back to active queue and skip to
336 * next page.
337 */
338 if (pmap_is_referenced(pg)) {
339 mutex_enter(&pg->interlock);
340 uvmpdpol_pageactivate_locked(pg);
341 mutex_exit(&pg->interlock);
342 uvmexp.pdreact++;
343 rw_exit(lock);
344 continue;
345 }
346
347 /* we have a potential victim. */
348 *plock = lock;
349 break;
350 }
351 mutex_exit(&s->lock);
352 return pg;
353 }
354
355 void
356 uvmpdpol_balancequeue(int swap_shortage)
357 {
358 struct uvmpdpol_globalstate *s = &pdpol_state;
359 int inactive_shortage;
360 struct vm_page *p, marker;
361 krwlock_t *lock;
362
363 /*
364 * we have done the scan to get free pages. now we work on meeting
365 * our inactive target.
366 */
367
368 memset(&marker, 0, sizeof(marker));
369 marker.flags = PG_MARKER;
370
371 mutex_enter(&s->lock);
372 TAILQ_INSERT_HEAD(&pdpol_state.s_activeq, &marker, pdqueue);
373 for (;;) {
374 inactive_shortage =
375 pdpol_state.s_inactarg - pdpol_state.s_inactive;
376 if (inactive_shortage <= 0 && swap_shortage <= 0) {
377 break;
378 }
379 p = TAILQ_NEXT(&marker, pdqueue);
380 if (p == NULL) {
381 break;
382 }
383 KASSERT((p->flags & PG_MARKER) == 0);
384
385 /*
386 * acquire interlock to stabilize page identity.
387 * if we have caught the page in a state of flux
388 * deal with it and retry.
389 */
390 mutex_enter(&p->interlock);
391 if (uvmpdpol_pagerealize_locked(p)) {
392 mutex_exit(&p->interlock);
393 continue;
394 }
395
396 /*
397 * now prepare to move on to the next page.
398 */
399 TAILQ_REMOVE(&pdpol_state.s_activeq, &marker, pdqueue);
400 TAILQ_INSERT_AFTER(&pdpol_state.s_activeq, p, &marker,
401 pdqueue);
402
403 /*
404 * try to lock the object that owns the page. see comments
405 * in uvmpdol_selectvictim().
406 */
407 mutex_exit(&s->lock);
408 lock = uvmpd_trylockowner(p);
409 /* p->interlock now released */
410 mutex_enter(&s->lock);
411 if (lock == NULL) {
412 /* didn't get it - try the next page. */
413 continue;
414 }
415
416 /*
417 * if there's a shortage of swap slots, try to free it.
418 */
419 if (swap_shortage > 0 && (p->flags & PG_SWAPBACKED) != 0 &&
420 (p->flags & PG_BUSY) == 0) {
421 if (uvmpd_dropswap(p)) {
422 swap_shortage--;
423 }
424 }
425
426 /*
427 * if there's a shortage of inactive pages, deactivate.
428 */
429 if (inactive_shortage > 0) {
430 pmap_clear_reference(p);
431 mutex_enter(&p->interlock);
432 uvmpdpol_pagedeactivate_locked(p);
433 mutex_exit(&p->interlock);
434 uvmexp.pddeact++;
435 inactive_shortage--;
436 }
437 rw_exit(lock);
438 }
439 TAILQ_REMOVE(&pdpol_state.s_activeq, &marker, pdqueue);
440 mutex_exit(&s->lock);
441 }
442
443 static void
444 uvmpdpol_pagedeactivate_locked(struct vm_page *pg)
445 {
446 struct uvmpdpol_globalstate *s __diagused = &pdpol_state;
447
448 KASSERT(mutex_owned(&s->lock));
449 KASSERT(mutex_owned(&pg->interlock));
450 KASSERT((pg->pqflags & (PQ_INTENT_MASK | PQ_INTENT_SET)) !=
451 (PQ_INTENT_D | PQ_INTENT_SET));
452
453 if (pg->pqflags & PQ_ACTIVE) {
454 TAILQ_REMOVE(&pdpol_state.s_activeq, pg, pdqueue);
455 KASSERT(pdpol_state.s_active > 0);
456 pdpol_state.s_active--;
457 }
458 if ((pg->pqflags & PQ_INACTIVE) == 0) {
459 KASSERT(pg->wire_count == 0);
460 TAILQ_INSERT_TAIL(&pdpol_state.s_inactiveq, pg, pdqueue);
461 pdpol_state.s_inactive++;
462 }
463 pg->pqflags &= ~(PQ_ACTIVE | PQ_INTENT_SET);
464 pg->pqflags |= PQ_INACTIVE;
465 }
466
467 void
468 uvmpdpol_pagedeactivate(struct vm_page *pg)
469 {
470
471 KASSERT(uvm_page_owner_locked_p(pg, false));
472 KASSERT(mutex_owned(&pg->interlock));
473
474 /*
475 * we have to clear the reference bit now, as when it comes time to
476 * realize the intent we won't have the object locked any more.
477 */
478 pmap_clear_reference(pg);
479 uvmpdpol_set_intent(pg, PQ_INTENT_I);
480 }
481
482 static void
483 uvmpdpol_pageactivate_locked(struct vm_page *pg)
484 {
485 struct uvmpdpol_globalstate *s __diagused = &pdpol_state;
486
487 KASSERT(mutex_owned(&s->lock));
488 KASSERT(mutex_owned(&pg->interlock));
489 KASSERT((pg->pqflags & (PQ_INTENT_MASK | PQ_INTENT_SET)) !=
490 (PQ_INTENT_D | PQ_INTENT_SET));
491
492 uvmpdpol_pagedequeue_locked(pg);
493 TAILQ_INSERT_TAIL(&pdpol_state.s_activeq, pg, pdqueue);
494 pdpol_state.s_active++;
495 pg->pqflags &= ~(PQ_INACTIVE | PQ_INTENT_SET);
496 pg->pqflags |= PQ_ACTIVE;
497 }
498
499 void
500 uvmpdpol_pageactivate(struct vm_page *pg)
501 {
502
503 KASSERT(uvm_page_owner_locked_p(pg, false));
504 KASSERT(mutex_owned(&pg->interlock));
505
506 uvmpdpol_set_intent(pg, PQ_INTENT_A);
507 }
508
509 static void
510 uvmpdpol_pagedequeue_locked(struct vm_page *pg)
511 {
512 struct uvmpdpol_globalstate *s __diagused = &pdpol_state;
513
514 KASSERT(mutex_owned(&s->lock));
515 KASSERT(mutex_owned(&pg->interlock));
516
517 if (pg->pqflags & PQ_ACTIVE) {
518 TAILQ_REMOVE(&pdpol_state.s_activeq, pg, pdqueue);
519 KASSERT((pg->pqflags & PQ_INACTIVE) == 0);
520 KASSERT(pdpol_state.s_active > 0);
521 pdpol_state.s_active--;
522 } else if (pg->pqflags & PQ_INACTIVE) {
523 TAILQ_REMOVE(&pdpol_state.s_inactiveq, pg, pdqueue);
524 KASSERT(pdpol_state.s_inactive > 0);
525 pdpol_state.s_inactive--;
526 }
527 pg->pqflags &= ~(PQ_ACTIVE | PQ_INACTIVE | PQ_INTENT_SET);
528 }
529
530 void
531 uvmpdpol_pagedequeue(struct vm_page *pg)
532 {
533
534 KASSERT(uvm_page_owner_locked_p(pg, true));
535 KASSERT(mutex_owned(&pg->interlock));
536
537 uvmpdpol_set_intent(pg, PQ_INTENT_D);
538 }
539
540 void
541 uvmpdpol_pageenqueue(struct vm_page *pg)
542 {
543
544 KASSERT(uvm_page_owner_locked_p(pg, false));
545 KASSERT(mutex_owned(&pg->interlock));
546
547 uvmpdpol_set_intent(pg, PQ_INTENT_E);
548 }
549
550 void
551 uvmpdpol_anfree(struct vm_anon *an)
552 {
553 }
554
555 bool
556 uvmpdpol_pageisqueued_p(struct vm_page *pg)
557 {
558 uint32_t pqflags;
559
560 /*
561 * if there's an intent set, we have to consider it. otherwise,
562 * return the actual state. we may be called unlocked for the
563 * purpose of assertions, which is safe due to the page lifecycle.
564 */
565 pqflags = atomic_load_relaxed(&pg->pqflags);
566 if ((pqflags & PQ_INTENT_SET) != 0) {
567 return (pqflags & PQ_INTENT_MASK) != PQ_INTENT_D;
568 } else {
569 return (pqflags & (PQ_ACTIVE | PQ_INACTIVE)) != 0;
570 }
571 }
572
573 bool
574 uvmpdpol_pageactivate_p(struct vm_page *pg)
575 {
576 uint32_t pqflags;
577
578 /* consider intent in preference to actual state. */
579 pqflags = atomic_load_relaxed(&pg->pqflags);
580 if ((pqflags & PQ_INTENT_SET) != 0) {
581 pqflags &= PQ_INTENT_MASK;
582 return pqflags != PQ_INTENT_A && pqflags != PQ_INTENT_E;
583 } else {
584 /*
585 * TODO: Enabling this may be too much of a big hammer,
586 * since we do get useful information from activations.
587 * Think about it more and maybe come up with a heuristic
588 * or something.
589 *
590 * return (pqflags & PQ_ACTIVE) == 0;
591 */
592 return true;
593 }
594 }
595
596 void
597 uvmpdpol_estimatepageable(int *active, int *inactive)
598 {
599 struct uvmpdpol_globalstate *s = &pdpol_state;
600
601 /*
602 * Don't take any locks here. This can be called from DDB, and in
603 * any case the numbers are stale the instant the lock is dropped,
604 * so it just doesn't matter.
605 */
606 if (active) {
607 *active = s->s_active;
608 }
609 if (inactive) {
610 *inactive = s->s_inactive;
611 }
612 }
613
614 #if !defined(PDSIM)
615 static int
616 min_check(struct uvm_pctparam *pct, int t)
617 {
618 struct uvmpdpol_globalstate *s = &pdpol_state;
619 int total = t;
620
621 if (pct != &s->s_anonmin) {
622 total += uvm_pctparam_get(&s->s_anonmin);
623 }
624 if (pct != &s->s_filemin) {
625 total += uvm_pctparam_get(&s->s_filemin);
626 }
627 if (pct != &s->s_execmin) {
628 total += uvm_pctparam_get(&s->s_execmin);
629 }
630 if (total > 95) {
631 return EINVAL;
632 }
633 return 0;
634 }
635 #endif /* !defined(PDSIM) */
636
637 void
638 uvmpdpol_init(void)
639 {
640 struct uvmpdpol_globalstate *s = &pdpol_state;
641
642 mutex_init(&s->lock, MUTEX_DEFAULT, IPL_NONE);
643 TAILQ_INIT(&s->s_activeq);
644 TAILQ_INIT(&s->s_inactiveq);
645 uvm_pctparam_init(&s->s_inactivepct, CLOCK_INACTIVEPCT, NULL);
646 uvm_pctparam_init(&s->s_anonmin, 10, min_check);
647 uvm_pctparam_init(&s->s_filemin, 10, min_check);
648 uvm_pctparam_init(&s->s_execmin, 5, min_check);
649 uvm_pctparam_init(&s->s_anonmax, 80, NULL);
650 uvm_pctparam_init(&s->s_filemax, 50, NULL);
651 uvm_pctparam_init(&s->s_execmax, 30, NULL);
652 }
653
654 void
655 uvmpdpol_init_cpu(struct uvm_cpu *ucpu)
656 {
657
658 ucpu->pdq =
659 kmem_alloc(CLOCK_PDQ_SIZE * sizeof(struct vm_page *), KM_SLEEP);
660 ucpu->pdqhead = CLOCK_PDQ_SIZE;
661 ucpu->pdqtail = CLOCK_PDQ_SIZE;
662 }
663
664 void
665 uvmpdpol_reinit(void)
666 {
667 }
668
669 bool
670 uvmpdpol_needsscan_p(void)
671 {
672
673 /*
674 * this must be an unlocked check: can be called from interrupt.
675 */
676 return pdpol_state.s_inactive < pdpol_state.s_inactarg;
677 }
678
679 void
680 uvmpdpol_tune(void)
681 {
682 struct uvmpdpol_globalstate *s = &pdpol_state;
683
684 mutex_enter(&s->lock);
685 clock_tune();
686 mutex_exit(&s->lock);
687 }
688
689 /*
690 * uvmpdpol_pagerealize_locked: take the intended state set on a page and
691 * make it real. return true if any work was done.
692 */
693 static bool
694 uvmpdpol_pagerealize_locked(struct vm_page *pg)
695 {
696 struct uvmpdpol_globalstate *s __diagused = &pdpol_state;
697
698 KASSERT(mutex_owned(&s->lock));
699 KASSERT(mutex_owned(&pg->interlock));
700
701 switch (pg->pqflags & (PQ_INTENT_MASK | PQ_INTENT_SET)) {
702 case PQ_INTENT_A | PQ_INTENT_SET:
703 case PQ_INTENT_E | PQ_INTENT_SET:
704 uvmpdpol_pageactivate_locked(pg);
705 return true;
706 case PQ_INTENT_I | PQ_INTENT_SET:
707 uvmpdpol_pagedeactivate_locked(pg);
708 return true;
709 case PQ_INTENT_D | PQ_INTENT_SET:
710 uvmpdpol_pagedequeue_locked(pg);
711 return true;
712 default:
713 return false;
714 }
715 }
716
717 /*
718 * uvmpdpol_flush: return the current uvm_cpu with all of its pending
719 * updates flushed to the global queues. this routine may block, and
720 * so can switch cpu. the idea is to empty to queue on whatever cpu
721 * we finally end up on.
722 */
723 static struct uvm_cpu *
724 uvmpdpol_flush(void)
725 {
726 struct uvmpdpol_globalstate *s __diagused = &pdpol_state;
727 struct uvm_cpu *ucpu;
728 struct vm_page *pg;
729
730 KASSERT(kpreempt_disabled());
731
732 mutex_enter(&s->lock);
733 for (;;) {
734 /*
735 * prefer scanning forwards (even though mutex_enter() is
736 * serializing) so as to not defeat any prefetch logic in
737 * the CPU. that means elsewhere enqueuing backwards, like
738 * a stack, but not so important there as pages are being
739 * added singularly.
740 *
741 * prefetch the next "struct vm_page" while working on the
742 * current one. this has a measurable and very positive
743 * effect in reducing the amount of time spent here under
744 * the global lock.
745 */
746 ucpu = curcpu()->ci_data.cpu_uvm;
747 KASSERT(ucpu->pdqhead <= ucpu->pdqtail);
748 if (__predict_false(ucpu->pdqhead == ucpu->pdqtail)) {
749 break;
750 }
751 pg = ucpu->pdq[ucpu->pdqhead++];
752 if (__predict_true(ucpu->pdqhead != ucpu->pdqtail)) {
753 __builtin_prefetch(ucpu->pdq[ucpu->pdqhead]);
754 }
755 mutex_enter(&pg->interlock);
756 pg->pqflags &= ~PQ_INTENT_QUEUED;
757 (void)uvmpdpol_pagerealize_locked(pg);
758 mutex_exit(&pg->interlock);
759 }
760 mutex_exit(&s->lock);
761 return ucpu;
762 }
763
764 /*
765 * uvmpdpol_pagerealize: realize any intent set on the page. in this
766 * implementation, that means putting the page on a per-CPU queue to be
767 * dealt with later.
768 */
769 void
770 uvmpdpol_pagerealize(struct vm_page *pg)
771 {
772 struct uvm_cpu *ucpu;
773
774 /*
775 * drain the per per-CPU queue if full, then enter the page.
776 */
777 kpreempt_disable();
778 ucpu = curcpu()->ci_data.cpu_uvm;
779 if (__predict_false(ucpu->pdqhead == 0)) {
780 ucpu = uvmpdpol_flush();
781 }
782 ucpu->pdq[--(ucpu->pdqhead)] = pg;
783 kpreempt_enable();
784 }
785
786 /*
787 * uvmpdpol_idle: called from the system idle loop. periodically purge any
788 * pending updates back to the global queues.
789 */
790 void
791 uvmpdpol_idle(struct uvm_cpu *ucpu)
792 {
793 struct uvmpdpol_globalstate *s = &pdpol_state;
794 struct vm_page *pg;
795
796 KASSERT(kpreempt_disabled());
797
798 /*
799 * if no pages in the queue, we have nothing to do.
800 */
801 if (ucpu->pdqhead == ucpu->pdqtail) {
802 ucpu->pdqtime = getticks();
803 return;
804 }
805
806 /*
807 * don't do this more than ~8 times a second as it would needlessly
808 * exert pressure.
809 */
810 if (getticks() - ucpu->pdqtime < (hz >> 3)) {
811 return;
812 }
813
814 /*
815 * the idle LWP can't block, so we have to try for the lock. if we
816 * get it, purge the per-CPU pending update queue. continually
817 * check for a pending resched: in that case exit immediately.
818 */
819 if (mutex_tryenter(&s->lock)) {
820 while (ucpu->pdqhead != ucpu->pdqtail) {
821 pg = ucpu->pdq[ucpu->pdqhead];
822 if (!mutex_tryenter(&pg->interlock)) {
823 break;
824 }
825 ucpu->pdqhead++;
826 pg->pqflags &= ~PQ_INTENT_QUEUED;
827 (void)uvmpdpol_pagerealize_locked(pg);
828 mutex_exit(&pg->interlock);
829 if (curcpu()->ci_want_resched) {
830 break;
831 }
832 }
833 if (ucpu->pdqhead == ucpu->pdqtail) {
834 ucpu->pdqtime = getticks();
835 }
836 mutex_exit(&s->lock);
837 }
838 }
839
840 #if !defined(PDSIM)
841
842 #include <sys/sysctl.h> /* XXX SYSCTL_DESCR */
843
844 void
845 uvmpdpol_sysctlsetup(void)
846 {
847 struct uvmpdpol_globalstate *s = &pdpol_state;
848
849 uvm_pctparam_createsysctlnode(&s->s_anonmin, "anonmin",
850 SYSCTL_DESCR("Percentage of physical memory reserved "
851 "for anonymous application data"));
852 uvm_pctparam_createsysctlnode(&s->s_filemin, "filemin",
853 SYSCTL_DESCR("Percentage of physical memory reserved "
854 "for cached file data"));
855 uvm_pctparam_createsysctlnode(&s->s_execmin, "execmin",
856 SYSCTL_DESCR("Percentage of physical memory reserved "
857 "for cached executable data"));
858
859 uvm_pctparam_createsysctlnode(&s->s_anonmax, "anonmax",
860 SYSCTL_DESCR("Percentage of physical memory which will "
861 "be reclaimed from other usage for "
862 "anonymous application data"));
863 uvm_pctparam_createsysctlnode(&s->s_filemax, "filemax",
864 SYSCTL_DESCR("Percentage of physical memory which will "
865 "be reclaimed from other usage for cached "
866 "file data"));
867 uvm_pctparam_createsysctlnode(&s->s_execmax, "execmax",
868 SYSCTL_DESCR("Percentage of physical memory which will "
869 "be reclaimed from other usage for cached "
870 "executable data"));
871
872 uvm_pctparam_createsysctlnode(&s->s_inactivepct, "inactivepct",
873 SYSCTL_DESCR("Percentage of inactive queue of "
874 "the entire (active + inactive) queue"));
875 }
876
877 #endif /* !defined(PDSIM) */
878
879 #if defined(PDSIM)
880 void
881 pdsim_dump(const char *id)
882 {
883 #if defined(DEBUG)
884 /* XXX */
885 #endif /* defined(DEBUG) */
886 }
887 #endif /* defined(PDSIM) */
Cache object: 6308448b97c57af7dc63374a775be178
|