FreeBSD/Linux Kernel Cross Reference
sys/kern/kern_runq.c
1 /* $NetBSD: kern_runq.c,v 1.22.4.4 2010/01/16 17:39:01 bouyer Exp $ */
2
3 /*
4 * Copyright (c) 2007, 2008 Mindaugas Rasiukevicius <rmind at NetBSD org>
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29 #include <sys/cdefs.h>
30 __KERNEL_RCSID(0, "$NetBSD: kern_runq.c,v 1.22.4.4 2010/01/16 17:39:01 bouyer Exp $");
31
32 #include <sys/param.h>
33 #include <sys/kernel.h>
34 #include <sys/bitops.h>
35 #include <sys/cpu.h>
36 #include <sys/idle.h>
37 #include <sys/intr.h>
38 #include <sys/kmem.h>
39 #include <sys/lwp.h>
40 #include <sys/mutex.h>
41 #include <sys/proc.h>
42 #include <sys/sched.h>
43 #include <sys/syscallargs.h>
44 #include <sys/sysctl.h>
45 #include <sys/systm.h>
46 #include <sys/types.h>
47 #include <sys/evcnt.h>
48
49 /*
50 * Priority related defintions.
51 */
52 #define PRI_TS_COUNT (NPRI_USER)
53 #define PRI_RT_COUNT (PRI_COUNT - PRI_TS_COUNT)
54 #define PRI_HTS_RANGE (PRI_TS_COUNT / 10)
55
56 #define PRI_HIGHEST_TS (MAXPRI_USER)
57
58 /*
59 * Bits per map.
60 */
61 #define BITMAP_BITS (32)
62 #define BITMAP_SHIFT (5)
63 #define BITMAP_MSB (0x80000000U)
64 #define BITMAP_MASK (BITMAP_BITS - 1)
65
66 /*
67 * Structures, runqueue.
68 */
69
70 const int schedppq = 1;
71
72 typedef struct {
73 TAILQ_HEAD(, lwp) q_head;
74 } queue_t;
75
76 typedef struct {
77 /* Bitmap */
78 uint32_t r_bitmap[PRI_COUNT >> BITMAP_SHIFT];
79 /* Counters */
80 u_int r_count; /* Count of the threads */
81 u_int r_avgcount; /* Average count of threads */
82 u_int r_mcount; /* Count of migratable threads */
83 /* Runqueues */
84 queue_t r_rt_queue[PRI_RT_COUNT];
85 queue_t r_ts_queue[PRI_TS_COUNT];
86 /* Event counters */
87 struct evcnt r_ev_pull;
88 struct evcnt r_ev_push;
89 struct evcnt r_ev_stay;
90 struct evcnt r_ev_localize;
91 } runqueue_t;
92
93 static void * sched_getrq(runqueue_t *, const pri_t);
94 #ifdef MULTIPROCESSOR
95 static lwp_t * sched_catchlwp(struct cpu_info *);
96 static void sched_balance(void *);
97 #endif
98
99 /*
100 * Preemption control.
101 */
102 int sched_upreempt_pri = PRI_KERNEL;
103 #if defined(__HAVE_PREEMPTION)
104 int sched_kpreempt_pri = PRI_USER_RT;
105 #else
106 int sched_kpreempt_pri = 1000;
107 #endif
108
109 /*
110 * Migration and balancing.
111 */
112 static u_int cacheht_time; /* Cache hotness time */
113 static u_int min_catch; /* Minimal LWP count for catching */
114 static u_int balance_period; /* Balance period */
115 static struct cpu_info *worker_ci; /* Victim CPU */
116 #ifdef MULTIPROCESSOR
117 static struct callout balance_ch; /* Callout of balancer */
118 #endif
119
120 void
121 runq_init(void)
122 {
123
124 /* Balancing */
125 worker_ci = curcpu();
126 cacheht_time = mstohz(3); /* ~3 ms */
127 balance_period = mstohz(300); /* ~300 ms */
128
129 /* Minimal count of LWPs for catching */
130 min_catch = 1;
131
132 /* Initialize balancing callout and run it */
133 #ifdef MULTIPROCESSOR
134 callout_init(&balance_ch, CALLOUT_MPSAFE);
135 callout_setfunc(&balance_ch, sched_balance, NULL);
136 callout_schedule(&balance_ch, balance_period);
137 #endif
138 }
139
140 void
141 sched_cpuattach(struct cpu_info *ci)
142 {
143 runqueue_t *ci_rq;
144 void *rq_ptr;
145 u_int i, size;
146 char *cpuname;
147
148 if (ci->ci_schedstate.spc_lwplock == NULL) {
149 ci->ci_schedstate.spc_lwplock =
150 mutex_obj_alloc(MUTEX_DEFAULT, IPL_SCHED);
151 }
152 if (ci == lwp0.l_cpu) {
153 /* Initialize the scheduler structure of the primary LWP */
154 lwp0.l_mutex = ci->ci_schedstate.spc_lwplock;
155 }
156 if (ci->ci_schedstate.spc_mutex != NULL) {
157 /* Already initialized. */
158 return;
159 }
160
161 /* Allocate the run queue */
162 size = roundup2(sizeof(runqueue_t), coherency_unit) + coherency_unit;
163 rq_ptr = kmem_zalloc(size, KM_SLEEP);
164 if (rq_ptr == NULL) {
165 panic("sched_cpuattach: could not allocate the runqueue");
166 }
167 ci_rq = (void *)(roundup2((uintptr_t)(rq_ptr), coherency_unit));
168
169 /* Initialize run queues */
170 ci->ci_schedstate.spc_mutex =
171 mutex_obj_alloc(MUTEX_DEFAULT, IPL_SCHED);
172 for (i = 0; i < PRI_RT_COUNT; i++)
173 TAILQ_INIT(&ci_rq->r_rt_queue[i].q_head);
174 for (i = 0; i < PRI_TS_COUNT; i++)
175 TAILQ_INIT(&ci_rq->r_ts_queue[i].q_head);
176
177 ci->ci_schedstate.spc_sched_info = ci_rq;
178
179 cpuname = kmem_alloc(8, KM_SLEEP);
180 snprintf(cpuname, 8, "cpu%d", cpu_index(ci));
181
182 evcnt_attach_dynamic(&ci_rq->r_ev_pull, EVCNT_TYPE_MISC, NULL,
183 cpuname, "runqueue pull");
184 evcnt_attach_dynamic(&ci_rq->r_ev_push, EVCNT_TYPE_MISC, NULL,
185 cpuname, "runqueue push");
186 evcnt_attach_dynamic(&ci_rq->r_ev_stay, EVCNT_TYPE_MISC, NULL,
187 cpuname, "runqueue stay");
188 evcnt_attach_dynamic(&ci_rq->r_ev_localize, EVCNT_TYPE_MISC, NULL,
189 cpuname, "runqueue localize");
190 }
191
192 /*
193 * Control of the runqueue.
194 */
195
196 static inline void *
197 sched_getrq(runqueue_t *ci_rq, const pri_t prio)
198 {
199
200 KASSERT(prio < PRI_COUNT);
201 return (prio <= PRI_HIGHEST_TS) ?
202 &ci_rq->r_ts_queue[prio].q_head :
203 &ci_rq->r_rt_queue[prio - PRI_HIGHEST_TS - 1].q_head;
204 }
205
206 void
207 sched_enqueue(struct lwp *l, bool swtch)
208 {
209 runqueue_t *ci_rq;
210 struct schedstate_percpu *spc;
211 TAILQ_HEAD(, lwp) *q_head;
212 const pri_t eprio = lwp_eprio(l);
213 struct cpu_info *ci;
214 int type;
215
216 ci = l->l_cpu;
217 spc = &ci->ci_schedstate;
218 ci_rq = spc->spc_sched_info;
219 KASSERT(lwp_locked(l, l->l_cpu->ci_schedstate.spc_mutex));
220
221 /* Update the last run time on switch */
222 if (__predict_true(swtch == true))
223 l->l_rticksum += (hardclock_ticks - l->l_rticks);
224 else if (l->l_rticks == 0)
225 l->l_rticks = hardclock_ticks;
226
227 /* Enqueue the thread */
228 q_head = sched_getrq(ci_rq, eprio);
229 if (TAILQ_EMPTY(q_head)) {
230 u_int i;
231 uint32_t q;
232
233 /* Mark bit */
234 i = eprio >> BITMAP_SHIFT;
235 q = BITMAP_MSB >> (eprio & BITMAP_MASK);
236 KASSERT((ci_rq->r_bitmap[i] & q) == 0);
237 ci_rq->r_bitmap[i] |= q;
238 }
239 TAILQ_INSERT_TAIL(q_head, l, l_runq);
240 ci_rq->r_count++;
241 if ((l->l_pflag & LP_BOUND) == 0)
242 ci_rq->r_mcount++;
243
244 /*
245 * Update the value of highest priority in the runqueue,
246 * if priority of this thread is higher.
247 */
248 if (eprio > spc->spc_maxpriority)
249 spc->spc_maxpriority = eprio;
250
251 sched_newts(l);
252
253 /*
254 * Wake the chosen CPU or cause a preemption if the newly
255 * enqueued thread has higher priority. Don't cause a
256 * preemption if the thread is yielding (swtch).
257 */
258 if (!swtch && eprio > spc->spc_curpriority) {
259 if (eprio >= sched_kpreempt_pri)
260 type = RESCHED_KPREEMPT;
261 else if (eprio >= sched_upreempt_pri)
262 type = RESCHED_IMMED;
263 else
264 type = RESCHED_LAZY;
265 cpu_need_resched(ci, type);
266 }
267 }
268
269 void
270 sched_dequeue(struct lwp *l)
271 {
272 runqueue_t *ci_rq;
273 TAILQ_HEAD(, lwp) *q_head;
274 struct schedstate_percpu *spc;
275 const pri_t eprio = lwp_eprio(l);
276
277 spc = & l->l_cpu->ci_schedstate;
278 ci_rq = spc->spc_sched_info;
279 KASSERT(lwp_locked(l, spc->spc_mutex));
280
281 KASSERT(eprio <= spc->spc_maxpriority);
282 KASSERT(ci_rq->r_bitmap[eprio >> BITMAP_SHIFT] != 0);
283 KASSERT(ci_rq->r_count > 0);
284
285 if (spc->spc_migrating == l)
286 spc->spc_migrating = NULL;
287
288 ci_rq->r_count--;
289 if ((l->l_pflag & LP_BOUND) == 0)
290 ci_rq->r_mcount--;
291
292 q_head = sched_getrq(ci_rq, eprio);
293 TAILQ_REMOVE(q_head, l, l_runq);
294 if (TAILQ_EMPTY(q_head)) {
295 u_int i;
296 uint32_t q;
297
298 /* Unmark bit */
299 i = eprio >> BITMAP_SHIFT;
300 q = BITMAP_MSB >> (eprio & BITMAP_MASK);
301 KASSERT((ci_rq->r_bitmap[i] & q) != 0);
302 ci_rq->r_bitmap[i] &= ~q;
303
304 /*
305 * Update the value of highest priority in the runqueue, in a
306 * case it was a last thread in the queue of highest priority.
307 */
308 if (eprio != spc->spc_maxpriority)
309 return;
310
311 do {
312 if (ci_rq->r_bitmap[i] != 0) {
313 q = ffs(ci_rq->r_bitmap[i]);
314 spc->spc_maxpriority =
315 (i << BITMAP_SHIFT) + (BITMAP_BITS - q);
316 return;
317 }
318 } while (i--);
319
320 /* If not found - set the lowest value */
321 spc->spc_maxpriority = 0;
322 }
323 }
324
325 /*
326 * Migration and balancing.
327 */
328
329 #ifdef MULTIPROCESSOR
330
331 /* Estimate if LWP is cache-hot */
332 static inline bool
333 lwp_cache_hot(const struct lwp *l)
334 {
335
336 if (__predict_false(l->l_slptime || l->l_rticks == 0))
337 return false;
338
339 return (hardclock_ticks - l->l_rticks <= cacheht_time);
340 }
341
342 /* Check if LWP can migrate to the chosen CPU */
343 static inline bool
344 sched_migratable(const struct lwp *l, struct cpu_info *ci)
345 {
346 const struct schedstate_percpu *spc = &ci->ci_schedstate;
347 KASSERT(lwp_locked(__UNCONST(l), NULL));
348
349 /* CPU is offline */
350 if (__predict_false(spc->spc_flags & SPCF_OFFLINE))
351 return false;
352
353 /* Affinity bind */
354 if (__predict_false(l->l_flag & LW_AFFINITY))
355 return kcpuset_isset(cpu_index(ci), l->l_affinity);
356
357 /* Processor-set */
358 return (spc->spc_psid == l->l_psid);
359 }
360
361 /*
362 * Estimate the migration of LWP to the other CPU.
363 * Take and return the CPU, if migration is needed.
364 */
365 struct cpu_info *
366 sched_takecpu(struct lwp *l)
367 {
368 struct cpu_info *ci, *tci, *first, *next;
369 struct schedstate_percpu *spc;
370 runqueue_t *ci_rq, *ici_rq;
371 pri_t eprio, lpri, pri;
372
373 KASSERT(lwp_locked(l, NULL));
374
375 /* If thread is strictly bound, do not estimate other CPUs */
376 ci = l->l_cpu;
377 if (l->l_pflag & LP_BOUND)
378 return ci;
379
380 spc = &ci->ci_schedstate;
381 ci_rq = spc->spc_sched_info;
382
383 /* Make sure that thread is in appropriate processor-set */
384 if (__predict_true(spc->spc_psid == l->l_psid)) {
385 /* If CPU of this thread is idling - run there */
386 if (ci_rq->r_count == 0) {
387 ci_rq->r_ev_stay.ev_count++;
388 return ci;
389 }
390 /* Stay if thread is cache-hot */
391 eprio = lwp_eprio(l);
392 if (__predict_true(l->l_stat != LSIDL) &&
393 lwp_cache_hot(l) && eprio >= spc->spc_curpriority) {
394 ci_rq->r_ev_stay.ev_count++;
395 return ci;
396 }
397 } else {
398 eprio = lwp_eprio(l);
399 }
400
401 /* Run on current CPU if priority of thread is higher */
402 ci = curcpu();
403 spc = &ci->ci_schedstate;
404 if (eprio > spc->spc_curpriority && sched_migratable(l, ci)) {
405 ci_rq = spc->spc_sched_info;
406 ci_rq->r_ev_localize.ev_count++;
407 return ci;
408 }
409
410 /*
411 * Look for the CPU with the lowest priority thread. In case of
412 * equal priority, choose the CPU with the fewest of threads.
413 */
414 first = l->l_cpu;
415 ci = first;
416 tci = first;
417 lpri = PRI_COUNT;
418 do {
419 next = CIRCLEQ_LOOP_NEXT(&cpu_queue, ci, ci_data.cpu_qchain);
420 spc = &ci->ci_schedstate;
421 ici_rq = spc->spc_sched_info;
422 pri = max(spc->spc_curpriority, spc->spc_maxpriority);
423 if (pri > lpri)
424 continue;
425
426 if (pri == lpri && ci_rq->r_count < ici_rq->r_count)
427 continue;
428
429 if (!sched_migratable(l, ci))
430 continue;
431
432 lpri = pri;
433 tci = ci;
434 ci_rq = ici_rq;
435 } while (ci = next, ci != first);
436
437 ci_rq = tci->ci_schedstate.spc_sched_info;
438 ci_rq->r_ev_push.ev_count++;
439
440 return tci;
441 }
442
443 /*
444 * Tries to catch an LWP from the runqueue of other CPU.
445 */
446 static struct lwp *
447 sched_catchlwp(struct cpu_info *ci)
448 {
449 struct cpu_info *curci = curcpu();
450 struct schedstate_percpu *spc, *curspc;
451 TAILQ_HEAD(, lwp) *q_head;
452 runqueue_t *ci_rq;
453 struct lwp *l;
454
455 curspc = &curci->ci_schedstate;
456 spc = &ci->ci_schedstate;
457 KASSERT(curspc->spc_psid == spc->spc_psid);
458
459 ci_rq = spc->spc_sched_info;
460 if (ci_rq->r_mcount < min_catch) {
461 spc_unlock(ci);
462 return NULL;
463 }
464
465 /* Take the highest priority thread */
466 q_head = sched_getrq(ci_rq, spc->spc_maxpriority);
467 l = TAILQ_FIRST(q_head);
468
469 for (;;) {
470 /* Check the first and next result from the queue */
471 if (l == NULL)
472 break;
473 KASSERT(l->l_stat == LSRUN);
474 KASSERT(l->l_flag & LW_INMEM);
475
476 /* Look for threads, whose are allowed to migrate */
477 if ((l->l_pflag & LP_BOUND) || lwp_cache_hot(l) ||
478 !sched_migratable(l, curci)) {
479 l = TAILQ_NEXT(l, l_runq);
480 continue;
481 }
482
483 /* Grab the thread, and move to the local run queue */
484 sched_dequeue(l);
485
486 /*
487 * If LWP is still context switching, we may need to
488 * spin-wait before changing its CPU.
489 */
490 if (__predict_false(l->l_ctxswtch != 0)) {
491 u_int count;
492 count = SPINLOCK_BACKOFF_MIN;
493 while (l->l_ctxswtch)
494 SPINLOCK_BACKOFF(count);
495 }
496 l->l_cpu = curci;
497 ci_rq->r_ev_pull.ev_count++;
498 lwp_unlock_to(l, curspc->spc_mutex);
499 sched_enqueue(l, false);
500 return l;
501 }
502 spc_unlock(ci);
503
504 return l;
505 }
506
507 /*
508 * Periodical calculations for balancing.
509 */
510 static void
511 sched_balance(void *nocallout)
512 {
513 struct cpu_info *ci, *hci;
514 runqueue_t *ci_rq;
515 CPU_INFO_ITERATOR cii;
516 u_int highest;
517
518 hci = curcpu();
519 highest = 0;
520
521 /* Make lockless countings */
522 for (CPU_INFO_FOREACH(cii, ci)) {
523 ci_rq = ci->ci_schedstate.spc_sched_info;
524
525 /* Average count of the threads */
526 ci_rq->r_avgcount = (ci_rq->r_avgcount + ci_rq->r_mcount) >> 1;
527
528 /* Look for CPU with the highest average */
529 if (ci_rq->r_avgcount > highest) {
530 hci = ci;
531 highest = ci_rq->r_avgcount;
532 }
533 }
534
535 /* Update the worker */
536 worker_ci = hci;
537
538 if (nocallout == NULL)
539 callout_schedule(&balance_ch, balance_period);
540 }
541
542 /*
543 * Called from each CPU's idle loop.
544 */
545 void
546 sched_idle(void)
547 {
548 struct cpu_info *ci = curcpu(), *tci = NULL;
549 struct schedstate_percpu *spc, *tspc;
550 runqueue_t *ci_rq;
551 bool dlock = false;
552
553 /* Check if there is a migrating LWP */
554 spc = &ci->ci_schedstate;
555 if (spc->spc_migrating == NULL)
556 goto no_migration;
557
558 spc_lock(ci);
559 for (;;) {
560 struct lwp *l;
561
562 l = spc->spc_migrating;
563 if (l == NULL)
564 break;
565
566 /*
567 * If second attempt, and target CPU has changed,
568 * drop the old lock.
569 */
570 if (dlock == true && tci != l->l_target_cpu) {
571 KASSERT(tci != NULL);
572 spc_unlock(tci);
573 dlock = false;
574 }
575
576 /*
577 * Nothing to do if destination has changed to the
578 * local CPU, or migration was done by other CPU.
579 */
580 tci = l->l_target_cpu;
581 if (tci == NULL || tci == ci) {
582 spc->spc_migrating = NULL;
583 l->l_target_cpu = NULL;
584 break;
585 }
586 tspc = &tci->ci_schedstate;
587
588 /*
589 * Double-lock the runqueues.
590 * We do that only once.
591 */
592 if (dlock == false) {
593 dlock = true;
594 if (ci < tci) {
595 spc_lock(tci);
596 } else if (!mutex_tryenter(tspc->spc_mutex)) {
597 spc_unlock(ci);
598 spc_lock(tci);
599 spc_lock(ci);
600 /* Check the situation again.. */
601 continue;
602 }
603 }
604
605 /* Migrate the thread */
606 KASSERT(l->l_stat == LSRUN);
607 spc->spc_migrating = NULL;
608 l->l_target_cpu = NULL;
609 sched_dequeue(l);
610 l->l_cpu = tci;
611 lwp_setlock(l, tspc->spc_mutex);
612 sched_enqueue(l, false);
613 break;
614 }
615 if (dlock == true) {
616 KASSERT(tci != NULL);
617 spc_unlock(tci);
618 }
619 spc_unlock(ci);
620
621 no_migration:
622 ci_rq = spc->spc_sched_info;
623 if ((spc->spc_flags & SPCF_OFFLINE) != 0 || ci_rq->r_count != 0) {
624 return;
625 }
626
627 /* Reset the counter, and call the balancer */
628 ci_rq->r_avgcount = 0;
629 sched_balance(ci);
630 tci = worker_ci;
631 tspc = &tci->ci_schedstate;
632 if (ci == tci || spc->spc_psid != tspc->spc_psid)
633 return;
634 spc_dlock(ci, tci);
635 (void)sched_catchlwp(tci);
636 spc_unlock(ci);
637 }
638
639 #else
640
641 struct cpu_info *
642 sched_takecpu(struct lwp *l)
643 {
644
645 return l->l_cpu;
646 }
647
648 void
649 sched_idle(void)
650 {
651
652 }
653 #endif /* MULTIPROCESSOR */
654
655 /*
656 * Scheduling statistics and balancing.
657 */
658 void
659 sched_lwp_stats(struct lwp *l)
660 {
661 int batch;
662
663 KASSERT(lwp_locked(l, NULL));
664
665 /* Update sleep time */
666 if (l->l_stat == LSSLEEP || l->l_stat == LSSTOP ||
667 l->l_stat == LSSUSPENDED)
668 l->l_slptime++;
669
670 /*
671 * Set that thread is more CPU-bound, if sum of run time exceeds the
672 * sum of sleep time. Check if thread is CPU-bound a first time.
673 */
674 batch = (l->l_rticksum > l->l_slpticksum);
675 if (batch != 0) {
676 if ((l->l_flag & LW_BATCH) == 0)
677 batch = 0;
678 l->l_flag |= LW_BATCH;
679 } else
680 l->l_flag &= ~LW_BATCH;
681
682 /*
683 * If thread is CPU-bound and never sleeps, it would occupy the CPU.
684 * In such case reset the value of last sleep, and check it later, if
685 * it is still zero - perform the migration, unmark the batch flag.
686 */
687 if (batch && (l->l_slptime + l->l_slpticksum) == 0) {
688 if (l->l_slpticks == 0) {
689 if (l->l_target_cpu == NULL &&
690 (l->l_stat == LSRUN || l->l_stat == LSONPROC)) {
691 struct cpu_info *ci = sched_takecpu(l);
692 l->l_target_cpu = (ci != l->l_cpu) ? ci : NULL;
693 }
694 l->l_flag &= ~LW_BATCH;
695 } else {
696 l->l_slpticks = 0;
697 }
698 }
699
700 /* Reset the time sums */
701 l->l_slpticksum = 0;
702 l->l_rticksum = 0;
703
704 /* Scheduler-specific hook */
705 sched_pstats_hook(l, batch);
706 }
707
708 /*
709 * Scheduler mill.
710 */
711 struct lwp *
712 sched_nextlwp(void)
713 {
714 struct cpu_info *ci = curcpu();
715 struct schedstate_percpu *spc;
716 TAILQ_HEAD(, lwp) *q_head;
717 runqueue_t *ci_rq;
718 struct lwp *l;
719
720 /* Return to idle LWP if there is a migrating thread */
721 spc = &ci->ci_schedstate;
722 if (__predict_false(spc->spc_migrating != NULL))
723 return NULL;
724 ci_rq = spc->spc_sched_info;
725
726 #ifdef MULTIPROCESSOR
727 /* If runqueue is empty, try to catch some thread from other CPU */
728 if (__predict_false(ci_rq->r_count == 0)) {
729 struct schedstate_percpu *cspc;
730 struct cpu_info *cci;
731
732 /* Offline CPUs should not perform this, however */
733 if (__predict_false(spc->spc_flags & SPCF_OFFLINE))
734 return NULL;
735
736 /* Reset the counter, and call the balancer */
737 ci_rq->r_avgcount = 0;
738 sched_balance(ci);
739 cci = worker_ci;
740 cspc = &cci->ci_schedstate;
741 if (ci == cci || spc->spc_psid != cspc->spc_psid ||
742 !mutex_tryenter(cci->ci_schedstate.spc_mutex))
743 return NULL;
744 return sched_catchlwp(cci);
745 }
746 #else
747 if (__predict_false(ci_rq->r_count == 0))
748 return NULL;
749 #endif
750
751 /* Take the highest priority thread */
752 KASSERT(ci_rq->r_bitmap[spc->spc_maxpriority >> BITMAP_SHIFT]);
753 q_head = sched_getrq(ci_rq, spc->spc_maxpriority);
754 l = TAILQ_FIRST(q_head);
755 KASSERT(l != NULL);
756
757 sched_oncpu(l);
758 l->l_rticks = hardclock_ticks;
759
760 return l;
761 }
762
763 bool
764 sched_curcpu_runnable_p(void)
765 {
766 const struct cpu_info *ci;
767 const struct schedstate_percpu *spc;
768 const runqueue_t *ci_rq;
769 bool rv;
770
771 kpreempt_disable();
772 ci = curcpu();
773 spc = &ci->ci_schedstate;
774 ci_rq = spc->spc_sched_info;
775
776 #ifndef __HAVE_FAST_SOFTINTS
777 if (ci->ci_data.cpu_softints) {
778 kpreempt_enable();
779 return true;
780 }
781 #endif
782
783 rv = (ci_rq->r_count != 0) ? true : false;
784 kpreempt_enable();
785
786 return rv;
787 }
788
789 /*
790 * Sysctl nodes and initialization.
791 */
792
793 SYSCTL_SETUP(sysctl_sched_setup, "sysctl sched setup")
794 {
795 const struct sysctlnode *node = NULL;
796
797 sysctl_createv(clog, 0, NULL, NULL,
798 CTLFLAG_PERMANENT,
799 CTLTYPE_NODE, "kern", NULL,
800 NULL, 0, NULL, 0,
801 CTL_KERN, CTL_EOL);
802 sysctl_createv(clog, 0, NULL, &node,
803 CTLFLAG_PERMANENT,
804 CTLTYPE_NODE, "sched",
805 SYSCTL_DESCR("Scheduler options"),
806 NULL, 0, NULL, 0,
807 CTL_KERN, CTL_CREATE, CTL_EOL);
808
809 if (node == NULL)
810 return;
811
812 sysctl_createv(clog, 0, &node, NULL,
813 CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
814 CTLTYPE_INT, "cacheht_time",
815 SYSCTL_DESCR("Cache hotness time (in ticks)"),
816 NULL, 0, &cacheht_time, 0,
817 CTL_CREATE, CTL_EOL);
818 sysctl_createv(clog, 0, &node, NULL,
819 CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
820 CTLTYPE_INT, "balance_period",
821 SYSCTL_DESCR("Balance period (in ticks)"),
822 NULL, 0, &balance_period, 0,
823 CTL_CREATE, CTL_EOL);
824 sysctl_createv(clog, 0, &node, NULL,
825 CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
826 CTLTYPE_INT, "min_catch",
827 SYSCTL_DESCR("Minimal count of threads for catching"),
828 NULL, 0, &min_catch, 0,
829 CTL_CREATE, CTL_EOL);
830 sysctl_createv(clog, 0, &node, NULL,
831 CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
832 CTLTYPE_INT, "timesoftints",
833 SYSCTL_DESCR("Track CPU time for soft interrupts"),
834 NULL, 0, &softint_timing, 0,
835 CTL_CREATE, CTL_EOL);
836 sysctl_createv(clog, 0, &node, NULL,
837 CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
838 CTLTYPE_INT, "kpreempt_pri",
839 SYSCTL_DESCR("Minimum priority to trigger kernel preemption"),
840 NULL, 0, &sched_kpreempt_pri, 0,
841 CTL_CREATE, CTL_EOL);
842 sysctl_createv(clog, 0, &node, NULL,
843 CTLFLAG_PERMANENT | CTLFLAG_READWRITE,
844 CTLTYPE_INT, "upreempt_pri",
845 SYSCTL_DESCR("Minimum priority to trigger user preemption"),
846 NULL, 0, &sched_upreempt_pri, 0,
847 CTL_CREATE, CTL_EOL);
848 }
849
850 /*
851 * Debugging.
852 */
853
854 #ifdef DDB
855
856 void
857 sched_print_runqueue(void (*pr)(const char *, ...)
858 __attribute__((__format__(__printf__,1,2))))
859 {
860 runqueue_t *ci_rq;
861 struct cpu_info *ci, *tci;
862 struct schedstate_percpu *spc;
863 struct lwp *l;
864 struct proc *p;
865 CPU_INFO_ITERATOR cii;
866
867 for (CPU_INFO_FOREACH(cii, ci)) {
868 int i;
869
870 spc = &ci->ci_schedstate;
871 ci_rq = spc->spc_sched_info;
872
873 (*pr)("Run-queue (CPU = %u):\n", ci->ci_index);
874 (*pr)(" pid.lid = %d.%d, r_count = %u, r_avgcount = %u, "
875 "maxpri = %d, mlwp = %p\n",
876 #ifdef MULTIPROCESSOR
877 ci->ci_curlwp->l_proc->p_pid, ci->ci_curlwp->l_lid,
878 #else
879 curlwp->l_proc->p_pid, curlwp->l_lid,
880 #endif
881 ci_rq->r_count, ci_rq->r_avgcount, spc->spc_maxpriority,
882 spc->spc_migrating);
883 i = (PRI_COUNT >> BITMAP_SHIFT) - 1;
884 do {
885 uint32_t q;
886 q = ci_rq->r_bitmap[i];
887 (*pr)(" bitmap[%d] => [ %d (0x%x) ]\n", i, ffs(q), q);
888 } while (i--);
889 }
890
891 (*pr)(" %5s %4s %4s %10s %3s %18s %4s %4s %s\n",
892 "LID", "PRI", "EPRI", "FL", "ST", "LWP", "CPU", "TCI", "LRTICKS");
893
894 PROCLIST_FOREACH(p, &allproc) {
895 if ((p->p_flag & PK_MARKER) != 0)
896 continue;
897 (*pr)(" /- %d (%s)\n", (int)p->p_pid, p->p_comm);
898 LIST_FOREACH(l, &p->p_lwps, l_sibling) {
899 ci = l->l_cpu;
900 tci = l->l_target_cpu;
901 (*pr)(" | %5d %4u %4u 0x%8.8x %3s %18p %4u %4d %u\n",
902 (int)l->l_lid, l->l_priority, lwp_eprio(l),
903 l->l_flag, l->l_stat == LSRUN ? "RQ" :
904 (l->l_stat == LSSLEEP ? "SQ" : "-"),
905 l, ci->ci_index, (tci ? tci->ci_index : -1),
906 (u_int)(hardclock_ticks - l->l_rticks));
907 }
908 }
909 }
910
911 #endif
Cache object: 6f540a80935fab9d4ab77522f8b9702c
|