The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/kern_switch.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
    3  *
    4  * Copyright (c) 2001 Jake Burkholder <jake@FreeBSD.org>
    5  * All rights reserved.
    6  *
    7  * Redistribution and use in source and binary forms, with or without
    8  * modification, are permitted provided that the following conditions
    9  * are met:
   10  * 1. Redistributions of source code must retain the above copyright
   11  *    notice, this list of conditions and the following disclaimer.
   12  * 2. Redistributions in binary form must reproduce the above copyright
   13  *    notice, this list of conditions and the following disclaimer in the
   14  *    documentation and/or other materials provided with the distribution.
   15  *
   16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   26  * SUCH DAMAGE.
   27  */
   28 
   29 
   30 #include <sys/cdefs.h>
   31 __FBSDID("$FreeBSD$");
   32 
   33 #include "opt_sched.h"
   34 
   35 #include <sys/param.h>
   36 #include <sys/systm.h>
   37 #include <sys/kdb.h>
   38 #include <sys/kernel.h>
   39 #include <sys/ktr.h>
   40 #include <sys/lock.h>
   41 #include <sys/mutex.h>
   42 #include <sys/proc.h>
   43 #include <sys/queue.h>
   44 #include <sys/sched.h>
   45 #include <sys/smp.h>
   46 #include <sys/sysctl.h>
   47 
   48 #include <machine/cpu.h>
   49 
   50 /* Uncomment this to enable logging of critical_enter/exit. */
   51 #if 0
   52 #define KTR_CRITICAL    KTR_SCHED
   53 #else
   54 #define KTR_CRITICAL    0
   55 #endif
   56 
   57 #ifdef FULL_PREEMPTION
   58 #ifndef PREEMPTION
   59 #error "The FULL_PREEMPTION option requires the PREEMPTION option"
   60 #endif
   61 #endif
   62 
   63 CTASSERT((RQB_BPW * RQB_LEN) == RQ_NQS);
   64 
   65 /*
   66  * kern.sched.preemption allows user space to determine if preemption support
   67  * is compiled in or not.  It is not currently a boot or runtime flag that
   68  * can be changed.
   69  */
   70 #ifdef PREEMPTION
   71 static int kern_sched_preemption = 1;
   72 #else
   73 static int kern_sched_preemption = 0;
   74 #endif
   75 SYSCTL_INT(_kern_sched, OID_AUTO, preemption, CTLFLAG_RD,
   76     &kern_sched_preemption, 0, "Kernel preemption enabled");
   77 
   78 /*
   79  * Support for scheduler stats exported via kern.sched.stats.  All stats may
   80  * be reset with kern.sched.stats.reset = 1.  Stats may be defined elsewhere
   81  * with SCHED_STAT_DEFINE().
   82  */
   83 #ifdef SCHED_STATS
   84 SYSCTL_NODE(_kern_sched, OID_AUTO, stats, CTLFLAG_RW, 0, "switch stats");
   85 
   86 /* Switch reasons from mi_switch(). */
   87 DPCPU_DEFINE(long, sched_switch_stats[SWT_COUNT]);
   88 SCHED_STAT_DEFINE_VAR(uncategorized,
   89     &DPCPU_NAME(sched_switch_stats[SWT_NONE]), "");
   90 SCHED_STAT_DEFINE_VAR(preempt,
   91     &DPCPU_NAME(sched_switch_stats[SWT_PREEMPT]), "");
   92 SCHED_STAT_DEFINE_VAR(owepreempt,
   93     &DPCPU_NAME(sched_switch_stats[SWT_OWEPREEMPT]), "");
   94 SCHED_STAT_DEFINE_VAR(turnstile,
   95     &DPCPU_NAME(sched_switch_stats[SWT_TURNSTILE]), "");
   96 SCHED_STAT_DEFINE_VAR(sleepq,
   97     &DPCPU_NAME(sched_switch_stats[SWT_SLEEPQ]), "");
   98 SCHED_STAT_DEFINE_VAR(sleepqtimo,
   99     &DPCPU_NAME(sched_switch_stats[SWT_SLEEPQTIMO]), "");
  100 SCHED_STAT_DEFINE_VAR(relinquish, 
  101     &DPCPU_NAME(sched_switch_stats[SWT_RELINQUISH]), "");
  102 SCHED_STAT_DEFINE_VAR(needresched,
  103     &DPCPU_NAME(sched_switch_stats[SWT_NEEDRESCHED]), "");
  104 SCHED_STAT_DEFINE_VAR(idle, 
  105     &DPCPU_NAME(sched_switch_stats[SWT_IDLE]), "");
  106 SCHED_STAT_DEFINE_VAR(iwait,
  107     &DPCPU_NAME(sched_switch_stats[SWT_IWAIT]), "");
  108 SCHED_STAT_DEFINE_VAR(suspend,
  109     &DPCPU_NAME(sched_switch_stats[SWT_SUSPEND]), "");
  110 SCHED_STAT_DEFINE_VAR(remotepreempt,
  111     &DPCPU_NAME(sched_switch_stats[SWT_REMOTEPREEMPT]), "");
  112 SCHED_STAT_DEFINE_VAR(remotewakeidle,
  113     &DPCPU_NAME(sched_switch_stats[SWT_REMOTEWAKEIDLE]), "");
  114 
  115 static int
  116 sysctl_stats_reset(SYSCTL_HANDLER_ARGS)
  117 {
  118         struct sysctl_oid *p;
  119         uintptr_t counter;
  120         int error;
  121         int val;
  122         int i;
  123 
  124         val = 0;
  125         error = sysctl_handle_int(oidp, &val, 0, req);
  126         if (error != 0 || req->newptr == NULL)
  127                 return (error);
  128         if (val == 0)
  129                 return (0);
  130         /*
  131          * Traverse the list of children of _kern_sched_stats and reset each
  132          * to 0.  Skip the reset entry.
  133          */
  134         SLIST_FOREACH(p, oidp->oid_parent, oid_link) {
  135                 if (p == oidp || p->oid_arg1 == NULL)
  136                         continue;
  137                 counter = (uintptr_t)p->oid_arg1;
  138                 CPU_FOREACH(i) {
  139                         *(long *)(dpcpu_off[i] + counter) = 0;
  140                 }
  141         }
  142         return (0);
  143 }
  144 
  145 SYSCTL_PROC(_kern_sched_stats, OID_AUTO, reset, CTLTYPE_INT | CTLFLAG_WR, NULL,
  146     0, sysctl_stats_reset, "I", "Reset scheduler statistics");
  147 #endif
  148 
  149 /************************************************************************
  150  * Functions that manipulate runnability from a thread perspective.     *
  151  ************************************************************************/
  152 /*
  153  * Select the thread that will be run next.
  154  */
  155 
  156 static __noinline struct thread *
  157 choosethread_panic(struct thread *td)
  158 {
  159 
  160         /*
  161          * If we are in panic, only allow system threads,
  162          * plus the one we are running in, to be run.
  163          */
  164 retry:
  165         if (((td->td_proc->p_flag & P_SYSTEM) == 0 &&
  166             (td->td_flags & TDF_INPANIC) == 0)) {
  167                 /* note that it is no longer on the run queue */
  168                 TD_SET_CAN_RUN(td);
  169                 td = sched_choose();
  170                 goto retry;
  171         }
  172 
  173         TD_SET_RUNNING(td);
  174         return (td);
  175 }
  176 
  177 struct thread *
  178 choosethread(void)
  179 {
  180         struct thread *td;
  181 
  182         td = sched_choose();
  183 
  184         if (__predict_false(panicstr != NULL))
  185                 return (choosethread_panic(td));
  186 
  187         TD_SET_RUNNING(td);
  188         return (td);
  189 }
  190 
  191 /*
  192  * Kernel thread preemption implementation.  Critical sections mark
  193  * regions of code in which preemptions are not allowed.
  194  *
  195  * It might seem a good idea to inline critical_enter() but, in order
  196  * to prevent instructions reordering by the compiler, a __compiler_membar()
  197  * would have to be used here (the same as sched_pin()).  The performance
  198  * penalty imposed by the membar could, then, produce slower code than
  199  * the function call itself, for most cases.
  200  */
  201 void
  202 critical_enter_KBI(void)
  203 {
  204 #ifdef KTR
  205         struct thread *td = curthread;
  206 #endif
  207         critical_enter();
  208         CTR4(KTR_CRITICAL, "critical_enter by thread %p (%ld, %s) to %d", td,
  209             (long)td->td_proc->p_pid, td->td_name, td->td_critnest);
  210 }
  211 
  212 void __noinline
  213 critical_exit_preempt(void)
  214 {
  215         struct thread *td;
  216         int flags;
  217 
  218         /*
  219          * If td_critnest is 0, it is possible that we are going to get
  220          * preempted again before reaching the code below. This happens
  221          * rarely and is harmless. However, this means td_owepreempt may
  222          * now be unset.
  223          */
  224         td = curthread;
  225         if (td->td_critnest != 0)
  226                 return;
  227         if (kdb_active)
  228                 return;
  229 
  230         /*
  231          * Microoptimization: we committed to switch,
  232          * disable preemption in interrupt handlers
  233          * while spinning for the thread lock.
  234          */
  235         td->td_critnest = 1;
  236         thread_lock(td);
  237         td->td_critnest--;
  238         flags = SW_INVOL | SW_PREEMPT;
  239         if (TD_IS_IDLETHREAD(td))
  240                 flags |= SWT_IDLE;
  241         else
  242                 flags |= SWT_OWEPREEMPT;
  243         mi_switch(flags, NULL);
  244         thread_unlock(td);
  245 }
  246 
  247 void
  248 critical_exit_KBI(void)
  249 {
  250 #ifdef KTR
  251         struct thread *td = curthread;
  252 #endif
  253         critical_exit();
  254         CTR4(KTR_CRITICAL, "critical_exit by thread %p (%ld, %s) to %d", td,
  255             (long)td->td_proc->p_pid, td->td_name, td->td_critnest);
  256 }
  257 
  258 /************************************************************************
  259  * SYSTEM RUN QUEUE manipulations and tests                             *
  260  ************************************************************************/
  261 /*
  262  * Initialize a run structure.
  263  */
  264 void
  265 runq_init(struct runq *rq)
  266 {
  267         int i;
  268 
  269         bzero(rq, sizeof *rq);
  270         for (i = 0; i < RQ_NQS; i++)
  271                 TAILQ_INIT(&rq->rq_queues[i]);
  272 }
  273 
  274 /*
  275  * Clear the status bit of the queue corresponding to priority level pri,
  276  * indicating that it is empty.
  277  */
  278 static __inline void
  279 runq_clrbit(struct runq *rq, int pri)
  280 {
  281         struct rqbits *rqb;
  282 
  283         rqb = &rq->rq_status;
  284         CTR4(KTR_RUNQ, "runq_clrbit: bits=%#x %#x bit=%#x word=%d",
  285             rqb->rqb_bits[RQB_WORD(pri)],
  286             rqb->rqb_bits[RQB_WORD(pri)] & ~RQB_BIT(pri),
  287             RQB_BIT(pri), RQB_WORD(pri));
  288         rqb->rqb_bits[RQB_WORD(pri)] &= ~RQB_BIT(pri);
  289 }
  290 
  291 /*
  292  * Find the index of the first non-empty run queue.  This is done by
  293  * scanning the status bits, a set bit indicates a non-empty queue.
  294  */
  295 static __inline int
  296 runq_findbit(struct runq *rq)
  297 {
  298         struct rqbits *rqb;
  299         int pri;
  300         int i;
  301 
  302         rqb = &rq->rq_status;
  303         for (i = 0; i < RQB_LEN; i++)
  304                 if (rqb->rqb_bits[i]) {
  305                         pri = RQB_FFS(rqb->rqb_bits[i]) + (i << RQB_L2BPW);
  306                         CTR3(KTR_RUNQ, "runq_findbit: bits=%#x i=%d pri=%d",
  307                             rqb->rqb_bits[i], i, pri);
  308                         return (pri);
  309                 }
  310 
  311         return (-1);
  312 }
  313 
  314 static __inline int
  315 runq_findbit_from(struct runq *rq, u_char pri)
  316 {
  317         struct rqbits *rqb;
  318         rqb_word_t mask;
  319         int i;
  320 
  321         /*
  322          * Set the mask for the first word so we ignore priorities before 'pri'.
  323          */
  324         mask = (rqb_word_t)-1 << (pri & (RQB_BPW - 1));
  325         rqb = &rq->rq_status;
  326 again:
  327         for (i = RQB_WORD(pri); i < RQB_LEN; mask = -1, i++) {
  328                 mask = rqb->rqb_bits[i] & mask;
  329                 if (mask == 0)
  330                         continue;
  331                 pri = RQB_FFS(mask) + (i << RQB_L2BPW);
  332                 CTR3(KTR_RUNQ, "runq_findbit_from: bits=%#x i=%d pri=%d",
  333                     mask, i, pri);
  334                 return (pri);
  335         }
  336         if (pri == 0)
  337                 return (-1);
  338         /*
  339          * Wrap back around to the beginning of the list just once so we
  340          * scan the whole thing.
  341          */
  342         pri = 0;
  343         goto again;
  344 }
  345 
  346 /*
  347  * Set the status bit of the queue corresponding to priority level pri,
  348  * indicating that it is non-empty.
  349  */
  350 static __inline void
  351 runq_setbit(struct runq *rq, int pri)
  352 {
  353         struct rqbits *rqb;
  354 
  355         rqb = &rq->rq_status;
  356         CTR4(KTR_RUNQ, "runq_setbit: bits=%#x %#x bit=%#x word=%d",
  357             rqb->rqb_bits[RQB_WORD(pri)],
  358             rqb->rqb_bits[RQB_WORD(pri)] | RQB_BIT(pri),
  359             RQB_BIT(pri), RQB_WORD(pri));
  360         rqb->rqb_bits[RQB_WORD(pri)] |= RQB_BIT(pri);
  361 }
  362 
  363 /*
  364  * Add the thread to the queue specified by its priority, and set the
  365  * corresponding status bit.
  366  */
  367 void
  368 runq_add(struct runq *rq, struct thread *td, int flags)
  369 {
  370         struct rqhead *rqh;
  371         int pri;
  372 
  373         pri = td->td_priority / RQ_PPQ;
  374         td->td_rqindex = pri;
  375         runq_setbit(rq, pri);
  376         rqh = &rq->rq_queues[pri];
  377         CTR4(KTR_RUNQ, "runq_add: td=%p pri=%d %d rqh=%p",
  378             td, td->td_priority, pri, rqh);
  379         if (flags & SRQ_PREEMPTED) {
  380                 TAILQ_INSERT_HEAD(rqh, td, td_runq);
  381         } else {
  382                 TAILQ_INSERT_TAIL(rqh, td, td_runq);
  383         }
  384 }
  385 
  386 void
  387 runq_add_pri(struct runq *rq, struct thread *td, u_char pri, int flags)
  388 {
  389         struct rqhead *rqh;
  390 
  391         KASSERT(pri < RQ_NQS, ("runq_add_pri: %d out of range", pri));
  392         td->td_rqindex = pri;
  393         runq_setbit(rq, pri);
  394         rqh = &rq->rq_queues[pri];
  395         CTR4(KTR_RUNQ, "runq_add_pri: td=%p pri=%d idx=%d rqh=%p",
  396             td, td->td_priority, pri, rqh);
  397         if (flags & SRQ_PREEMPTED) {
  398                 TAILQ_INSERT_HEAD(rqh, td, td_runq);
  399         } else {
  400                 TAILQ_INSERT_TAIL(rqh, td, td_runq);
  401         }
  402 }
  403 /*
  404  * Return true if there are runnable processes of any priority on the run
  405  * queue, false otherwise.  Has no side effects, does not modify the run
  406  * queue structure.
  407  */
  408 int
  409 runq_check(struct runq *rq)
  410 {
  411         struct rqbits *rqb;
  412         int i;
  413 
  414         rqb = &rq->rq_status;
  415         for (i = 0; i < RQB_LEN; i++)
  416                 if (rqb->rqb_bits[i]) {
  417                         CTR2(KTR_RUNQ, "runq_check: bits=%#x i=%d",
  418                             rqb->rqb_bits[i], i);
  419                         return (1);
  420                 }
  421         CTR0(KTR_RUNQ, "runq_check: empty");
  422 
  423         return (0);
  424 }
  425 
  426 /*
  427  * Find the highest priority process on the run queue.
  428  */
  429 struct thread *
  430 runq_choose_fuzz(struct runq *rq, int fuzz)
  431 {
  432         struct rqhead *rqh;
  433         struct thread *td;
  434         int pri;
  435 
  436         while ((pri = runq_findbit(rq)) != -1) {
  437                 rqh = &rq->rq_queues[pri];
  438                 /* fuzz == 1 is normal.. 0 or less are ignored */
  439                 if (fuzz > 1) {
  440                         /*
  441                          * In the first couple of entries, check if
  442                          * there is one for our CPU as a preference.
  443                          */
  444                         int count = fuzz;
  445                         int cpu = PCPU_GET(cpuid);
  446                         struct thread *td2;
  447                         td2 = td = TAILQ_FIRST(rqh);
  448 
  449                         while (count-- && td2) {
  450                                 if (td2->td_lastcpu == cpu) {
  451                                         td = td2;
  452                                         break;
  453                                 }
  454                                 td2 = TAILQ_NEXT(td2, td_runq);
  455                         }
  456                 } else
  457                         td = TAILQ_FIRST(rqh);
  458                 KASSERT(td != NULL, ("runq_choose_fuzz: no proc on busy queue"));
  459                 CTR3(KTR_RUNQ,
  460                     "runq_choose_fuzz: pri=%d thread=%p rqh=%p", pri, td, rqh);
  461                 return (td);
  462         }
  463         CTR1(KTR_RUNQ, "runq_choose_fuzz: idleproc pri=%d", pri);
  464 
  465         return (NULL);
  466 }
  467 
  468 /*
  469  * Find the highest priority process on the run queue.
  470  */
  471 struct thread *
  472 runq_choose(struct runq *rq)
  473 {
  474         struct rqhead *rqh;
  475         struct thread *td;
  476         int pri;
  477 
  478         while ((pri = runq_findbit(rq)) != -1) {
  479                 rqh = &rq->rq_queues[pri];
  480                 td = TAILQ_FIRST(rqh);
  481                 KASSERT(td != NULL, ("runq_choose: no thread on busy queue"));
  482                 CTR3(KTR_RUNQ,
  483                     "runq_choose: pri=%d thread=%p rqh=%p", pri, td, rqh);
  484                 return (td);
  485         }
  486         CTR1(KTR_RUNQ, "runq_choose: idlethread pri=%d", pri);
  487 
  488         return (NULL);
  489 }
  490 
  491 struct thread *
  492 runq_choose_from(struct runq *rq, u_char idx)
  493 {
  494         struct rqhead *rqh;
  495         struct thread *td;
  496         int pri;
  497 
  498         if ((pri = runq_findbit_from(rq, idx)) != -1) {
  499                 rqh = &rq->rq_queues[pri];
  500                 td = TAILQ_FIRST(rqh);
  501                 KASSERT(td != NULL, ("runq_choose: no thread on busy queue"));
  502                 CTR4(KTR_RUNQ,
  503                     "runq_choose_from: pri=%d thread=%p idx=%d rqh=%p",
  504                     pri, td, td->td_rqindex, rqh);
  505                 return (td);
  506         }
  507         CTR1(KTR_RUNQ, "runq_choose_from: idlethread pri=%d", pri);
  508 
  509         return (NULL);
  510 }
  511 /*
  512  * Remove the thread from the queue specified by its priority, and clear the
  513  * corresponding status bit if the queue becomes empty.
  514  * Caller must set state afterwards.
  515  */
  516 void
  517 runq_remove(struct runq *rq, struct thread *td)
  518 {
  519 
  520         runq_remove_idx(rq, td, NULL);
  521 }
  522 
  523 void
  524 runq_remove_idx(struct runq *rq, struct thread *td, u_char *idx)
  525 {
  526         struct rqhead *rqh;
  527         u_char pri;
  528 
  529         KASSERT(td->td_flags & TDF_INMEM,
  530                 ("runq_remove_idx: thread swapped out"));
  531         pri = td->td_rqindex;
  532         KASSERT(pri < RQ_NQS, ("runq_remove_idx: Invalid index %d\n", pri));
  533         rqh = &rq->rq_queues[pri];
  534         CTR4(KTR_RUNQ, "runq_remove_idx: td=%p, pri=%d %d rqh=%p",
  535             td, td->td_priority, pri, rqh);
  536         TAILQ_REMOVE(rqh, td, td_runq);
  537         if (TAILQ_EMPTY(rqh)) {
  538                 CTR0(KTR_RUNQ, "runq_remove_idx: empty");
  539                 runq_clrbit(rq, pri);
  540                 if (idx != NULL && *idx == pri)
  541                         *idx = (pri + 1) % RQ_NQS;
  542         }
  543 }

Cache object: acaad67907366516cb0e5f079e0e1071


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.