The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/kern_switch.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
    3  *
    4  * Copyright (c) 2001 Jake Burkholder <jake@FreeBSD.org>
    5  * All rights reserved.
    6  *
    7  * Redistribution and use in source and binary forms, with or without
    8  * modification, are permitted provided that the following conditions
    9  * are met:
   10  * 1. Redistributions of source code must retain the above copyright
   11  *    notice, this list of conditions and the following disclaimer.
   12  * 2. Redistributions in binary form must reproduce the above copyright
   13  *    notice, this list of conditions and the following disclaimer in the
   14  *    documentation and/or other materials provided with the distribution.
   15  *
   16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   26  * SUCH DAMAGE.
   27  */
   28 
   29 #include <sys/cdefs.h>
   30 __FBSDID("$FreeBSD$");
   31 
   32 #include "opt_sched.h"
   33 
   34 #include <sys/param.h>
   35 #include <sys/systm.h>
   36 #include <sys/kdb.h>
   37 #include <sys/kernel.h>
   38 #include <sys/ktr.h>
   39 #include <sys/lock.h>
   40 #include <sys/mutex.h>
   41 #include <sys/proc.h>
   42 #include <sys/queue.h>
   43 #include <sys/sched.h>
   44 #include <sys/smp.h>
   45 #include <sys/sysctl.h>
   46 
   47 #include <machine/cpu.h>
   48 
   49 /* Uncomment this to enable logging of critical_enter/exit. */
   50 #if 0
   51 #define KTR_CRITICAL    KTR_SCHED
   52 #else
   53 #define KTR_CRITICAL    0
   54 #endif
   55 
   56 #ifdef FULL_PREEMPTION
   57 #ifndef PREEMPTION
   58 #error "The FULL_PREEMPTION option requires the PREEMPTION option"
   59 #endif
   60 #endif
   61 
   62 CTASSERT((RQB_BPW * RQB_LEN) == RQ_NQS);
   63 
   64 /*
   65  * kern.sched.preemption allows user space to determine if preemption support
   66  * is compiled in or not.  It is not currently a boot or runtime flag that
   67  * can be changed.
   68  */
   69 #ifdef PREEMPTION
   70 static int kern_sched_preemption = 1;
   71 #else
   72 static int kern_sched_preemption = 0;
   73 #endif
   74 SYSCTL_INT(_kern_sched, OID_AUTO, preemption, CTLFLAG_RD,
   75     &kern_sched_preemption, 0, "Kernel preemption enabled");
   76 
   77 /*
   78  * Support for scheduler stats exported via kern.sched.stats.  All stats may
   79  * be reset with kern.sched.stats.reset = 1.  Stats may be defined elsewhere
   80  * with SCHED_STAT_DEFINE().
   81  */
   82 #ifdef SCHED_STATS
   83 SYSCTL_NODE(_kern_sched, OID_AUTO, stats, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
   84     "switch stats");
   85 
   86 /* Switch reasons from mi_switch(). */
   87 DPCPU_DEFINE(long, sched_switch_stats[SWT_COUNT]);
   88 SCHED_STAT_DEFINE_VAR(uncategorized,
   89     &DPCPU_NAME(sched_switch_stats[SWT_NONE]), "");
   90 SCHED_STAT_DEFINE_VAR(preempt,
   91     &DPCPU_NAME(sched_switch_stats[SWT_PREEMPT]), "");
   92 SCHED_STAT_DEFINE_VAR(owepreempt,
   93     &DPCPU_NAME(sched_switch_stats[SWT_OWEPREEMPT]), "");
   94 SCHED_STAT_DEFINE_VAR(turnstile,
   95     &DPCPU_NAME(sched_switch_stats[SWT_TURNSTILE]), "");
   96 SCHED_STAT_DEFINE_VAR(sleepq,
   97     &DPCPU_NAME(sched_switch_stats[SWT_SLEEPQ]), "");
   98 SCHED_STAT_DEFINE_VAR(sleepqtimo,
   99     &DPCPU_NAME(sched_switch_stats[SWT_SLEEPQTIMO]), "");
  100 SCHED_STAT_DEFINE_VAR(relinquish, 
  101     &DPCPU_NAME(sched_switch_stats[SWT_RELINQUISH]), "");
  102 SCHED_STAT_DEFINE_VAR(needresched,
  103     &DPCPU_NAME(sched_switch_stats[SWT_NEEDRESCHED]), "");
  104 SCHED_STAT_DEFINE_VAR(idle, 
  105     &DPCPU_NAME(sched_switch_stats[SWT_IDLE]), "");
  106 SCHED_STAT_DEFINE_VAR(iwait,
  107     &DPCPU_NAME(sched_switch_stats[SWT_IWAIT]), "");
  108 SCHED_STAT_DEFINE_VAR(suspend,
  109     &DPCPU_NAME(sched_switch_stats[SWT_SUSPEND]), "");
  110 SCHED_STAT_DEFINE_VAR(remotepreempt,
  111     &DPCPU_NAME(sched_switch_stats[SWT_REMOTEPREEMPT]), "");
  112 SCHED_STAT_DEFINE_VAR(remotewakeidle,
  113     &DPCPU_NAME(sched_switch_stats[SWT_REMOTEWAKEIDLE]), "");
  114 
  115 static int
  116 sysctl_stats_reset(SYSCTL_HANDLER_ARGS)
  117 {
  118         struct sysctl_oid *p;
  119         uintptr_t counter;
  120         int error;
  121         int val;
  122         int i;
  123 
  124         val = 0;
  125         error = sysctl_handle_int(oidp, &val, 0, req);
  126         if (error != 0 || req->newptr == NULL)
  127                 return (error);
  128         if (val == 0)
  129                 return (0);
  130         /*
  131          * Traverse the list of children of _kern_sched_stats and reset each
  132          * to 0.  Skip the reset entry.
  133          */
  134         SLIST_FOREACH(p, oidp->oid_parent, oid_link) {
  135                 if (p == oidp || p->oid_arg1 == NULL)
  136                         continue;
  137                 counter = (uintptr_t)p->oid_arg1;
  138                 CPU_FOREACH(i) {
  139                         *(long *)(dpcpu_off[i] + counter) = 0;
  140                 }
  141         }
  142         return (0);
  143 }
  144 
  145 SYSCTL_PROC(_kern_sched_stats, OID_AUTO, reset,
  146     CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_NEEDGIANT, NULL, 0,
  147     sysctl_stats_reset, "I",
  148     "Reset scheduler statistics");
  149 #endif
  150 
  151 /************************************************************************
  152  * Functions that manipulate runnability from a thread perspective.     *
  153  ************************************************************************/
  154 /*
  155  * Select the thread that will be run next.
  156  */
  157 
  158 static __noinline struct thread *
  159 choosethread_panic(struct thread *td)
  160 {
  161 
  162         /*
  163          * If we are in panic, only allow system threads,
  164          * plus the one we are running in, to be run.
  165          */
  166 retry:
  167         if (((td->td_proc->p_flag & P_SYSTEM) == 0 &&
  168             (td->td_flags & TDF_INPANIC) == 0)) {
  169                 /* note that it is no longer on the run queue */
  170                 TD_SET_CAN_RUN(td);
  171                 td = sched_choose();
  172                 goto retry;
  173         }
  174 
  175         TD_SET_RUNNING(td);
  176         return (td);
  177 }
  178 
  179 struct thread *
  180 choosethread(void)
  181 {
  182         struct thread *td;
  183 
  184         td = sched_choose();
  185 
  186         if (KERNEL_PANICKED())
  187                 return (choosethread_panic(td));
  188 
  189         TD_SET_RUNNING(td);
  190         return (td);
  191 }
  192 
  193 /*
  194  * Kernel thread preemption implementation.  Critical sections mark
  195  * regions of code in which preemptions are not allowed.
  196  *
  197  * It might seem a good idea to inline critical_enter() but, in order
  198  * to prevent instructions reordering by the compiler, a __compiler_membar()
  199  * would have to be used here (the same as sched_pin()).  The performance
  200  * penalty imposed by the membar could, then, produce slower code than
  201  * the function call itself, for most cases.
  202  */
  203 void
  204 critical_enter_KBI(void)
  205 {
  206 #ifdef KTR
  207         struct thread *td = curthread;
  208 #endif
  209         critical_enter();
  210         CTR4(KTR_CRITICAL, "critical_enter by thread %p (%ld, %s) to %d", td,
  211             (long)td->td_proc->p_pid, td->td_name, td->td_critnest);
  212 }
  213 
  214 void __noinline
  215 critical_exit_preempt(void)
  216 {
  217         struct thread *td;
  218         int flags;
  219 
  220         /*
  221          * If td_critnest is 0, it is possible that we are going to get
  222          * preempted again before reaching the code below. This happens
  223          * rarely and is harmless. However, this means td_owepreempt may
  224          * now be unset.
  225          */
  226         td = curthread;
  227         if (td->td_critnest != 0)
  228                 return;
  229         if (kdb_active)
  230                 return;
  231 
  232         /*
  233          * Microoptimization: we committed to switch,
  234          * disable preemption in interrupt handlers
  235          * while spinning for the thread lock.
  236          */
  237         td->td_critnest = 1;
  238         thread_lock(td);
  239         td->td_critnest--;
  240         flags = SW_INVOL | SW_PREEMPT;
  241         if (TD_IS_IDLETHREAD(td))
  242                 flags |= SWT_IDLE;
  243         else
  244                 flags |= SWT_OWEPREEMPT;
  245         mi_switch(flags);
  246 }
  247 
  248 void
  249 critical_exit_KBI(void)
  250 {
  251 #ifdef KTR
  252         struct thread *td = curthread;
  253 #endif
  254         critical_exit();
  255         CTR4(KTR_CRITICAL, "critical_exit by thread %p (%ld, %s) to %d", td,
  256             (long)td->td_proc->p_pid, td->td_name, td->td_critnest);
  257 }
  258 
  259 /************************************************************************
  260  * SYSTEM RUN QUEUE manipulations and tests                             *
  261  ************************************************************************/
  262 /*
  263  * Initialize a run structure.
  264  */
  265 void
  266 runq_init(struct runq *rq)
  267 {
  268         int i;
  269 
  270         bzero(rq, sizeof *rq);
  271         for (i = 0; i < RQ_NQS; i++)
  272                 TAILQ_INIT(&rq->rq_queues[i]);
  273 }
  274 
  275 /*
  276  * Clear the status bit of the queue corresponding to priority level pri,
  277  * indicating that it is empty.
  278  */
  279 static __inline void
  280 runq_clrbit(struct runq *rq, int pri)
  281 {
  282         struct rqbits *rqb;
  283 
  284         rqb = &rq->rq_status;
  285         CTR4(KTR_RUNQ, "runq_clrbit: bits=%#x %#x bit=%#x word=%d",
  286             rqb->rqb_bits[RQB_WORD(pri)],
  287             rqb->rqb_bits[RQB_WORD(pri)] & ~RQB_BIT(pri),
  288             RQB_BIT(pri), RQB_WORD(pri));
  289         rqb->rqb_bits[RQB_WORD(pri)] &= ~RQB_BIT(pri);
  290 }
  291 
  292 /*
  293  * Find the index of the first non-empty run queue.  This is done by
  294  * scanning the status bits, a set bit indicates a non-empty queue.
  295  */
  296 static __inline int
  297 runq_findbit(struct runq *rq)
  298 {
  299         struct rqbits *rqb;
  300         int pri;
  301         int i;
  302 
  303         rqb = &rq->rq_status;
  304         for (i = 0; i < RQB_LEN; i++)
  305                 if (rqb->rqb_bits[i]) {
  306                         pri = RQB_FFS(rqb->rqb_bits[i]) + (i << RQB_L2BPW);
  307                         CTR3(KTR_RUNQ, "runq_findbit: bits=%#x i=%d pri=%d",
  308                             rqb->rqb_bits[i], i, pri);
  309                         return (pri);
  310                 }
  311 
  312         return (-1);
  313 }
  314 
  315 static __inline int
  316 runq_findbit_from(struct runq *rq, u_char pri)
  317 {
  318         struct rqbits *rqb;
  319         rqb_word_t mask;
  320         int i;
  321 
  322         /*
  323          * Set the mask for the first word so we ignore priorities before 'pri'.
  324          */
  325         mask = (rqb_word_t)-1 << (pri & (RQB_BPW - 1));
  326         rqb = &rq->rq_status;
  327 again:
  328         for (i = RQB_WORD(pri); i < RQB_LEN; mask = -1, i++) {
  329                 mask = rqb->rqb_bits[i] & mask;
  330                 if (mask == 0)
  331                         continue;
  332                 pri = RQB_FFS(mask) + (i << RQB_L2BPW);
  333                 CTR3(KTR_RUNQ, "runq_findbit_from: bits=%#x i=%d pri=%d",
  334                     mask, i, pri);
  335                 return (pri);
  336         }
  337         if (pri == 0)
  338                 return (-1);
  339         /*
  340          * Wrap back around to the beginning of the list just once so we
  341          * scan the whole thing.
  342          */
  343         pri = 0;
  344         goto again;
  345 }
  346 
  347 /*
  348  * Set the status bit of the queue corresponding to priority level pri,
  349  * indicating that it is non-empty.
  350  */
  351 static __inline void
  352 runq_setbit(struct runq *rq, int pri)
  353 {
  354         struct rqbits *rqb;
  355 
  356         rqb = &rq->rq_status;
  357         CTR4(KTR_RUNQ, "runq_setbit: bits=%#x %#x bit=%#x word=%d",
  358             rqb->rqb_bits[RQB_WORD(pri)],
  359             rqb->rqb_bits[RQB_WORD(pri)] | RQB_BIT(pri),
  360             RQB_BIT(pri), RQB_WORD(pri));
  361         rqb->rqb_bits[RQB_WORD(pri)] |= RQB_BIT(pri);
  362 }
  363 
  364 /*
  365  * Add the thread to the queue specified by its priority, and set the
  366  * corresponding status bit.
  367  */
  368 void
  369 runq_add(struct runq *rq, struct thread *td, int flags)
  370 {
  371         struct rqhead *rqh;
  372         int pri;
  373 
  374         pri = td->td_priority / RQ_PPQ;
  375         td->td_rqindex = pri;
  376         runq_setbit(rq, pri);
  377         rqh = &rq->rq_queues[pri];
  378         CTR4(KTR_RUNQ, "runq_add: td=%p pri=%d %d rqh=%p",
  379             td, td->td_priority, pri, rqh);
  380         if (flags & SRQ_PREEMPTED) {
  381                 TAILQ_INSERT_HEAD(rqh, td, td_runq);
  382         } else {
  383                 TAILQ_INSERT_TAIL(rqh, td, td_runq);
  384         }
  385 }
  386 
  387 void
  388 runq_add_pri(struct runq *rq, struct thread *td, u_char pri, int flags)
  389 {
  390         struct rqhead *rqh;
  391 
  392         KASSERT(pri < RQ_NQS, ("runq_add_pri: %d out of range", pri));
  393         td->td_rqindex = pri;
  394         runq_setbit(rq, pri);
  395         rqh = &rq->rq_queues[pri];
  396         CTR4(KTR_RUNQ, "runq_add_pri: td=%p pri=%d idx=%d rqh=%p",
  397             td, td->td_priority, pri, rqh);
  398         if (flags & SRQ_PREEMPTED) {
  399                 TAILQ_INSERT_HEAD(rqh, td, td_runq);
  400         } else {
  401                 TAILQ_INSERT_TAIL(rqh, td, td_runq);
  402         }
  403 }
  404 /*
  405  * Return true if there are runnable processes of any priority on the run
  406  * queue, false otherwise.  Has no side effects, does not modify the run
  407  * queue structure.
  408  */
  409 int
  410 runq_check(struct runq *rq)
  411 {
  412         struct rqbits *rqb;
  413         int i;
  414 
  415         rqb = &rq->rq_status;
  416         for (i = 0; i < RQB_LEN; i++)
  417                 if (rqb->rqb_bits[i]) {
  418                         CTR2(KTR_RUNQ, "runq_check: bits=%#x i=%d",
  419                             rqb->rqb_bits[i], i);
  420                         return (1);
  421                 }
  422         CTR0(KTR_RUNQ, "runq_check: empty");
  423 
  424         return (0);
  425 }
  426 
  427 /*
  428  * Find the highest priority process on the run queue.
  429  */
  430 struct thread *
  431 runq_choose_fuzz(struct runq *rq, int fuzz)
  432 {
  433         struct rqhead *rqh;
  434         struct thread *td;
  435         int pri;
  436 
  437         while ((pri = runq_findbit(rq)) != -1) {
  438                 rqh = &rq->rq_queues[pri];
  439                 /* fuzz == 1 is normal.. 0 or less are ignored */
  440                 if (fuzz > 1) {
  441                         /*
  442                          * In the first couple of entries, check if
  443                          * there is one for our CPU as a preference.
  444                          */
  445                         int count = fuzz;
  446                         int cpu = PCPU_GET(cpuid);
  447                         struct thread *td2;
  448                         td2 = td = TAILQ_FIRST(rqh);
  449 
  450                         while (count-- && td2) {
  451                                 if (td2->td_lastcpu == cpu) {
  452                                         td = td2;
  453                                         break;
  454                                 }
  455                                 td2 = TAILQ_NEXT(td2, td_runq);
  456                         }
  457                 } else
  458                         td = TAILQ_FIRST(rqh);
  459                 KASSERT(td != NULL, ("runq_choose_fuzz: no proc on busy queue"));
  460                 CTR3(KTR_RUNQ,
  461                     "runq_choose_fuzz: pri=%d thread=%p rqh=%p", pri, td, rqh);
  462                 return (td);
  463         }
  464         CTR1(KTR_RUNQ, "runq_choose_fuzz: idleproc pri=%d", pri);
  465 
  466         return (NULL);
  467 }
  468 
  469 /*
  470  * Find the highest priority process on the run queue.
  471  */
  472 struct thread *
  473 runq_choose(struct runq *rq)
  474 {
  475         struct rqhead *rqh;
  476         struct thread *td;
  477         int pri;
  478 
  479         while ((pri = runq_findbit(rq)) != -1) {
  480                 rqh = &rq->rq_queues[pri];
  481                 td = TAILQ_FIRST(rqh);
  482                 KASSERT(td != NULL, ("runq_choose: no thread on busy queue"));
  483                 CTR3(KTR_RUNQ,
  484                     "runq_choose: pri=%d thread=%p rqh=%p", pri, td, rqh);
  485                 return (td);
  486         }
  487         CTR1(KTR_RUNQ, "runq_choose: idlethread pri=%d", pri);
  488 
  489         return (NULL);
  490 }
  491 
  492 struct thread *
  493 runq_choose_from(struct runq *rq, u_char idx)
  494 {
  495         struct rqhead *rqh;
  496         struct thread *td;
  497         int pri;
  498 
  499         if ((pri = runq_findbit_from(rq, idx)) != -1) {
  500                 rqh = &rq->rq_queues[pri];
  501                 td = TAILQ_FIRST(rqh);
  502                 KASSERT(td != NULL, ("runq_choose: no thread on busy queue"));
  503                 CTR4(KTR_RUNQ,
  504                     "runq_choose_from: pri=%d thread=%p idx=%d rqh=%p",
  505                     pri, td, td->td_rqindex, rqh);
  506                 return (td);
  507         }
  508         CTR1(KTR_RUNQ, "runq_choose_from: idlethread pri=%d", pri);
  509 
  510         return (NULL);
  511 }
  512 /*
  513  * Remove the thread from the queue specified by its priority, and clear the
  514  * corresponding status bit if the queue becomes empty.
  515  * Caller must set state afterwards.
  516  */
  517 void
  518 runq_remove(struct runq *rq, struct thread *td)
  519 {
  520 
  521         runq_remove_idx(rq, td, NULL);
  522 }
  523 
  524 void
  525 runq_remove_idx(struct runq *rq, struct thread *td, u_char *idx)
  526 {
  527         struct rqhead *rqh;
  528         u_char pri;
  529 
  530         KASSERT(td->td_flags & TDF_INMEM,
  531                 ("runq_remove_idx: thread swapped out"));
  532         pri = td->td_rqindex;
  533         KASSERT(pri < RQ_NQS, ("runq_remove_idx: Invalid index %d\n", pri));
  534         rqh = &rq->rq_queues[pri];
  535         CTR4(KTR_RUNQ, "runq_remove_idx: td=%p, pri=%d %d rqh=%p",
  536             td, td->td_priority, pri, rqh);
  537         TAILQ_REMOVE(rqh, td, td_runq);
  538         if (TAILQ_EMPTY(rqh)) {
  539                 CTR0(KTR_RUNQ, "runq_remove_idx: empty");
  540                 runq_clrbit(rq, pri);
  541                 if (idx != NULL && *idx == pri)
  542                         *idx = (pri + 1) % RQ_NQS;
  543         }
  544 }

Cache object: 39f52a281290bcdeececf78e9651a5e7


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.