The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/kern_switch.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 2001 Jake Burkholder <jake@FreeBSD.org>
    3  * All rights reserved.
    4  *
    5  * Redistribution and use in source and binary forms, with or without
    6  * modification, are permitted provided that the following conditions
    7  * are met:
    8  * 1. Redistributions of source code must retain the above copyright
    9  *    notice, this list of conditions and the following disclaimer.
   10  * 2. Redistributions in binary form must reproduce the above copyright
   11  *    notice, this list of conditions and the following disclaimer in the
   12  *    documentation and/or other materials provided with the distribution.
   13  *
   14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   24  * SUCH DAMAGE.
   25  */
   26 
   27 
   28 #include <sys/cdefs.h>
   29 __FBSDID("$FreeBSD: releng/11.2/sys/kern/kern_switch.c 327481 2018-01-02 00:14:46Z mjg $");
   30 
   31 #include "opt_sched.h"
   32 
   33 #include <sys/param.h>
   34 #include <sys/systm.h>
   35 #include <sys/kdb.h>
   36 #include <sys/kernel.h>
   37 #include <sys/ktr.h>
   38 #include <sys/lock.h>
   39 #include <sys/mutex.h>
   40 #include <sys/proc.h>
   41 #include <sys/queue.h>
   42 #include <sys/sched.h>
   43 #include <sys/smp.h>
   44 #include <sys/sysctl.h>
   45 
   46 #include <machine/cpu.h>
   47 
   48 /* Uncomment this to enable logging of critical_enter/exit. */
   49 #if 0
   50 #define KTR_CRITICAL    KTR_SCHED
   51 #else
   52 #define KTR_CRITICAL    0
   53 #endif
   54 
   55 #ifdef FULL_PREEMPTION
   56 #ifndef PREEMPTION
   57 #error "The FULL_PREEMPTION option requires the PREEMPTION option"
   58 #endif
   59 #endif
   60 
   61 CTASSERT((RQB_BPW * RQB_LEN) == RQ_NQS);
   62 
   63 /*
   64  * kern.sched.preemption allows user space to determine if preemption support
   65  * is compiled in or not.  It is not currently a boot or runtime flag that
   66  * can be changed.
   67  */
   68 #ifdef PREEMPTION
   69 static int kern_sched_preemption = 1;
   70 #else
   71 static int kern_sched_preemption = 0;
   72 #endif
   73 SYSCTL_INT(_kern_sched, OID_AUTO, preemption, CTLFLAG_RD,
   74     &kern_sched_preemption, 0, "Kernel preemption enabled");
   75 
   76 /*
   77  * Support for scheduler stats exported via kern.sched.stats.  All stats may
   78  * be reset with kern.sched.stats.reset = 1.  Stats may be defined elsewhere
   79  * with SCHED_STAT_DEFINE().
   80  */
   81 #ifdef SCHED_STATS
   82 SYSCTL_NODE(_kern_sched, OID_AUTO, stats, CTLFLAG_RW, 0, "switch stats");
   83 
   84 /* Switch reasons from mi_switch(). */
   85 DPCPU_DEFINE(long, sched_switch_stats[SWT_COUNT]);
   86 SCHED_STAT_DEFINE_VAR(uncategorized,
   87     &DPCPU_NAME(sched_switch_stats[SWT_NONE]), "");
   88 SCHED_STAT_DEFINE_VAR(preempt,
   89     &DPCPU_NAME(sched_switch_stats[SWT_PREEMPT]), "");
   90 SCHED_STAT_DEFINE_VAR(owepreempt,
   91     &DPCPU_NAME(sched_switch_stats[SWT_OWEPREEMPT]), "");
   92 SCHED_STAT_DEFINE_VAR(turnstile,
   93     &DPCPU_NAME(sched_switch_stats[SWT_TURNSTILE]), "");
   94 SCHED_STAT_DEFINE_VAR(sleepq,
   95     &DPCPU_NAME(sched_switch_stats[SWT_SLEEPQ]), "");
   96 SCHED_STAT_DEFINE_VAR(sleepqtimo,
   97     &DPCPU_NAME(sched_switch_stats[SWT_SLEEPQTIMO]), "");
   98 SCHED_STAT_DEFINE_VAR(relinquish, 
   99     &DPCPU_NAME(sched_switch_stats[SWT_RELINQUISH]), "");
  100 SCHED_STAT_DEFINE_VAR(needresched,
  101     &DPCPU_NAME(sched_switch_stats[SWT_NEEDRESCHED]), "");
  102 SCHED_STAT_DEFINE_VAR(idle, 
  103     &DPCPU_NAME(sched_switch_stats[SWT_IDLE]), "");
  104 SCHED_STAT_DEFINE_VAR(iwait,
  105     &DPCPU_NAME(sched_switch_stats[SWT_IWAIT]), "");
  106 SCHED_STAT_DEFINE_VAR(suspend,
  107     &DPCPU_NAME(sched_switch_stats[SWT_SUSPEND]), "");
  108 SCHED_STAT_DEFINE_VAR(remotepreempt,
  109     &DPCPU_NAME(sched_switch_stats[SWT_REMOTEPREEMPT]), "");
  110 SCHED_STAT_DEFINE_VAR(remotewakeidle,
  111     &DPCPU_NAME(sched_switch_stats[SWT_REMOTEWAKEIDLE]), "");
  112 
  113 static int
  114 sysctl_stats_reset(SYSCTL_HANDLER_ARGS)
  115 {
  116         struct sysctl_oid *p;
  117         uintptr_t counter;
  118         int error;
  119         int val;
  120         int i;
  121 
  122         val = 0;
  123         error = sysctl_handle_int(oidp, &val, 0, req);
  124         if (error != 0 || req->newptr == NULL)
  125                 return (error);
  126         if (val == 0)
  127                 return (0);
  128         /*
  129          * Traverse the list of children of _kern_sched_stats and reset each
  130          * to 0.  Skip the reset entry.
  131          */
  132         SLIST_FOREACH(p, oidp->oid_parent, oid_link) {
  133                 if (p == oidp || p->oid_arg1 == NULL)
  134                         continue;
  135                 counter = (uintptr_t)p->oid_arg1;
  136                 CPU_FOREACH(i) {
  137                         *(long *)(dpcpu_off[i] + counter) = 0;
  138                 }
  139         }
  140         return (0);
  141 }
  142 
  143 SYSCTL_PROC(_kern_sched_stats, OID_AUTO, reset, CTLTYPE_INT | CTLFLAG_WR, NULL,
  144     0, sysctl_stats_reset, "I", "Reset scheduler statistics");
  145 #endif
  146 
  147 /************************************************************************
  148  * Functions that manipulate runnability from a thread perspective.     *
  149  ************************************************************************/
  150 /*
  151  * Select the thread that will be run next.
  152  */
  153 
  154 static __noinline struct thread *
  155 choosethread_panic(struct thread *td)
  156 {
  157 
  158         /*
  159          * If we are in panic, only allow system threads,
  160          * plus the one we are running in, to be run.
  161          */
  162 retry:
  163         if (((td->td_proc->p_flag & P_SYSTEM) == 0 &&
  164             (td->td_flags & TDF_INPANIC) == 0)) {
  165                 /* note that it is no longer on the run queue */
  166                 TD_SET_CAN_RUN(td);
  167                 td = sched_choose();
  168                 goto retry;
  169         }
  170 
  171         TD_SET_RUNNING(td);
  172         return (td);
  173 }
  174 
  175 struct thread *
  176 choosethread(void)
  177 {
  178         struct thread *td;
  179 
  180         td = sched_choose();
  181 
  182         if (__predict_false(panicstr != NULL))
  183                 return (choosethread_panic(td));
  184 
  185         TD_SET_RUNNING(td);
  186         return (td);
  187 }
  188 
  189 /*
  190  * Kernel thread preemption implementation.  Critical sections mark
  191  * regions of code in which preemptions are not allowed.
  192  *
  193  * It might seem a good idea to inline critical_enter() but, in order
  194  * to prevent instructions reordering by the compiler, a __compiler_membar()
  195  * would have to be used here (the same as sched_pin()).  The performance
  196  * penalty imposed by the membar could, then, produce slower code than
  197  * the function call itself, for most cases.
  198  */
  199 void
  200 critical_enter(void)
  201 {
  202         struct thread *td;
  203 
  204         td = curthread;
  205         td->td_critnest++;
  206         CTR4(KTR_CRITICAL, "critical_enter by thread %p (%ld, %s) to %d", td,
  207             (long)td->td_proc->p_pid, td->td_name, td->td_critnest);
  208 }
  209 
  210 void
  211 critical_exit(void)
  212 {
  213         struct thread *td;
  214         int flags;
  215 
  216         td = curthread;
  217         KASSERT(td->td_critnest != 0,
  218             ("critical_exit: td_critnest == 0"));
  219 
  220         if (td->td_critnest == 1) {
  221                 td->td_critnest = 0;
  222 
  223                 /*
  224                  * Interrupt handlers execute critical_exit() on
  225                  * leave, and td_owepreempt may be left set by an
  226                  * interrupt handler only when td_critnest > 0.  If we
  227                  * are decrementing td_critnest from 1 to 0, read
  228                  * td_owepreempt after decrementing, to not miss the
  229                  * preempt.  Disallow compiler to reorder operations.
  230                  */
  231                 __compiler_membar();
  232                 if (td->td_owepreempt && !kdb_active) {
  233                         /*
  234                          * Microoptimization: we committed to switch,
  235                          * disable preemption in interrupt handlers
  236                          * while spinning for the thread lock.
  237                          */
  238                         td->td_critnest = 1;
  239                         thread_lock(td);
  240                         td->td_critnest--;
  241                         flags = SW_INVOL | SW_PREEMPT;
  242                         if (TD_IS_IDLETHREAD(td))
  243                                 flags |= SWT_IDLE;
  244                         else
  245                                 flags |= SWT_OWEPREEMPT;
  246                         mi_switch(flags, NULL);
  247                         thread_unlock(td);
  248                 }
  249         } else
  250                 td->td_critnest--;
  251 
  252         CTR4(KTR_CRITICAL, "critical_exit by thread %p (%ld, %s) to %d", td,
  253             (long)td->td_proc->p_pid, td->td_name, td->td_critnest);
  254 }
  255 
  256 /************************************************************************
  257  * SYSTEM RUN QUEUE manipulations and tests                             *
  258  ************************************************************************/
  259 /*
  260  * Initialize a run structure.
  261  */
  262 void
  263 runq_init(struct runq *rq)
  264 {
  265         int i;
  266 
  267         bzero(rq, sizeof *rq);
  268         for (i = 0; i < RQ_NQS; i++)
  269                 TAILQ_INIT(&rq->rq_queues[i]);
  270 }
  271 
  272 /*
  273  * Clear the status bit of the queue corresponding to priority level pri,
  274  * indicating that it is empty.
  275  */
  276 static __inline void
  277 runq_clrbit(struct runq *rq, int pri)
  278 {
  279         struct rqbits *rqb;
  280 
  281         rqb = &rq->rq_status;
  282         CTR4(KTR_RUNQ, "runq_clrbit: bits=%#x %#x bit=%#x word=%d",
  283             rqb->rqb_bits[RQB_WORD(pri)],
  284             rqb->rqb_bits[RQB_WORD(pri)] & ~RQB_BIT(pri),
  285             RQB_BIT(pri), RQB_WORD(pri));
  286         rqb->rqb_bits[RQB_WORD(pri)] &= ~RQB_BIT(pri);
  287 }
  288 
  289 /*
  290  * Find the index of the first non-empty run queue.  This is done by
  291  * scanning the status bits, a set bit indicates a non-empty queue.
  292  */
  293 static __inline int
  294 runq_findbit(struct runq *rq)
  295 {
  296         struct rqbits *rqb;
  297         int pri;
  298         int i;
  299 
  300         rqb = &rq->rq_status;
  301         for (i = 0; i < RQB_LEN; i++)
  302                 if (rqb->rqb_bits[i]) {
  303                         pri = RQB_FFS(rqb->rqb_bits[i]) + (i << RQB_L2BPW);
  304                         CTR3(KTR_RUNQ, "runq_findbit: bits=%#x i=%d pri=%d",
  305                             rqb->rqb_bits[i], i, pri);
  306                         return (pri);
  307                 }
  308 
  309         return (-1);
  310 }
  311 
  312 static __inline int
  313 runq_findbit_from(struct runq *rq, u_char pri)
  314 {
  315         struct rqbits *rqb;
  316         rqb_word_t mask;
  317         int i;
  318 
  319         /*
  320          * Set the mask for the first word so we ignore priorities before 'pri'.
  321          */
  322         mask = (rqb_word_t)-1 << (pri & (RQB_BPW - 1));
  323         rqb = &rq->rq_status;
  324 again:
  325         for (i = RQB_WORD(pri); i < RQB_LEN; mask = -1, i++) {
  326                 mask = rqb->rqb_bits[i] & mask;
  327                 if (mask == 0)
  328                         continue;
  329                 pri = RQB_FFS(mask) + (i << RQB_L2BPW);
  330                 CTR3(KTR_RUNQ, "runq_findbit_from: bits=%#x i=%d pri=%d",
  331                     mask, i, pri);
  332                 return (pri);
  333         }
  334         if (pri == 0)
  335                 return (-1);
  336         /*
  337          * Wrap back around to the beginning of the list just once so we
  338          * scan the whole thing.
  339          */
  340         pri = 0;
  341         goto again;
  342 }
  343 
  344 /*
  345  * Set the status bit of the queue corresponding to priority level pri,
  346  * indicating that it is non-empty.
  347  */
  348 static __inline void
  349 runq_setbit(struct runq *rq, int pri)
  350 {
  351         struct rqbits *rqb;
  352 
  353         rqb = &rq->rq_status;
  354         CTR4(KTR_RUNQ, "runq_setbit: bits=%#x %#x bit=%#x word=%d",
  355             rqb->rqb_bits[RQB_WORD(pri)],
  356             rqb->rqb_bits[RQB_WORD(pri)] | RQB_BIT(pri),
  357             RQB_BIT(pri), RQB_WORD(pri));
  358         rqb->rqb_bits[RQB_WORD(pri)] |= RQB_BIT(pri);
  359 }
  360 
  361 /*
  362  * Add the thread to the queue specified by its priority, and set the
  363  * corresponding status bit.
  364  */
  365 void
  366 runq_add(struct runq *rq, struct thread *td, int flags)
  367 {
  368         struct rqhead *rqh;
  369         int pri;
  370 
  371         pri = td->td_priority / RQ_PPQ;
  372         td->td_rqindex = pri;
  373         runq_setbit(rq, pri);
  374         rqh = &rq->rq_queues[pri];
  375         CTR4(KTR_RUNQ, "runq_add: td=%p pri=%d %d rqh=%p",
  376             td, td->td_priority, pri, rqh);
  377         if (flags & SRQ_PREEMPTED) {
  378                 TAILQ_INSERT_HEAD(rqh, td, td_runq);
  379         } else {
  380                 TAILQ_INSERT_TAIL(rqh, td, td_runq);
  381         }
  382 }
  383 
  384 void
  385 runq_add_pri(struct runq *rq, struct thread *td, u_char pri, int flags)
  386 {
  387         struct rqhead *rqh;
  388 
  389         KASSERT(pri < RQ_NQS, ("runq_add_pri: %d out of range", pri));
  390         td->td_rqindex = pri;
  391         runq_setbit(rq, pri);
  392         rqh = &rq->rq_queues[pri];
  393         CTR4(KTR_RUNQ, "runq_add_pri: td=%p pri=%d idx=%d rqh=%p",
  394             td, td->td_priority, pri, rqh);
  395         if (flags & SRQ_PREEMPTED) {
  396                 TAILQ_INSERT_HEAD(rqh, td, td_runq);
  397         } else {
  398                 TAILQ_INSERT_TAIL(rqh, td, td_runq);
  399         }
  400 }
  401 /*
  402  * Return true if there are runnable processes of any priority on the run
  403  * queue, false otherwise.  Has no side effects, does not modify the run
  404  * queue structure.
  405  */
  406 int
  407 runq_check(struct runq *rq)
  408 {
  409         struct rqbits *rqb;
  410         int i;
  411 
  412         rqb = &rq->rq_status;
  413         for (i = 0; i < RQB_LEN; i++)
  414                 if (rqb->rqb_bits[i]) {
  415                         CTR2(KTR_RUNQ, "runq_check: bits=%#x i=%d",
  416                             rqb->rqb_bits[i], i);
  417                         return (1);
  418                 }
  419         CTR0(KTR_RUNQ, "runq_check: empty");
  420 
  421         return (0);
  422 }
  423 
  424 /*
  425  * Find the highest priority process on the run queue.
  426  */
  427 struct thread *
  428 runq_choose_fuzz(struct runq *rq, int fuzz)
  429 {
  430         struct rqhead *rqh;
  431         struct thread *td;
  432         int pri;
  433 
  434         while ((pri = runq_findbit(rq)) != -1) {
  435                 rqh = &rq->rq_queues[pri];
  436                 /* fuzz == 1 is normal.. 0 or less are ignored */
  437                 if (fuzz > 1) {
  438                         /*
  439                          * In the first couple of entries, check if
  440                          * there is one for our CPU as a preference.
  441                          */
  442                         int count = fuzz;
  443                         int cpu = PCPU_GET(cpuid);
  444                         struct thread *td2;
  445                         td2 = td = TAILQ_FIRST(rqh);
  446 
  447                         while (count-- && td2) {
  448                                 if (td2->td_lastcpu == cpu) {
  449                                         td = td2;
  450                                         break;
  451                                 }
  452                                 td2 = TAILQ_NEXT(td2, td_runq);
  453                         }
  454                 } else
  455                         td = TAILQ_FIRST(rqh);
  456                 KASSERT(td != NULL, ("runq_choose_fuzz: no proc on busy queue"));
  457                 CTR3(KTR_RUNQ,
  458                     "runq_choose_fuzz: pri=%d thread=%p rqh=%p", pri, td, rqh);
  459                 return (td);
  460         }
  461         CTR1(KTR_RUNQ, "runq_choose_fuzz: idleproc pri=%d", pri);
  462 
  463         return (NULL);
  464 }
  465 
  466 /*
  467  * Find the highest priority process on the run queue.
  468  */
  469 struct thread *
  470 runq_choose(struct runq *rq)
  471 {
  472         struct rqhead *rqh;
  473         struct thread *td;
  474         int pri;
  475 
  476         while ((pri = runq_findbit(rq)) != -1) {
  477                 rqh = &rq->rq_queues[pri];
  478                 td = TAILQ_FIRST(rqh);
  479                 KASSERT(td != NULL, ("runq_choose: no thread on busy queue"));
  480                 CTR3(KTR_RUNQ,
  481                     "runq_choose: pri=%d thread=%p rqh=%p", pri, td, rqh);
  482                 return (td);
  483         }
  484         CTR1(KTR_RUNQ, "runq_choose: idlethread pri=%d", pri);
  485 
  486         return (NULL);
  487 }
  488 
  489 struct thread *
  490 runq_choose_from(struct runq *rq, u_char idx)
  491 {
  492         struct rqhead *rqh;
  493         struct thread *td;
  494         int pri;
  495 
  496         if ((pri = runq_findbit_from(rq, idx)) != -1) {
  497                 rqh = &rq->rq_queues[pri];
  498                 td = TAILQ_FIRST(rqh);
  499                 KASSERT(td != NULL, ("runq_choose: no thread on busy queue"));
  500                 CTR4(KTR_RUNQ,
  501                     "runq_choose_from: pri=%d thread=%p idx=%d rqh=%p",
  502                     pri, td, td->td_rqindex, rqh);
  503                 return (td);
  504         }
  505         CTR1(KTR_RUNQ, "runq_choose_from: idlethread pri=%d", pri);
  506 
  507         return (NULL);
  508 }
  509 /*
  510  * Remove the thread from the queue specified by its priority, and clear the
  511  * corresponding status bit if the queue becomes empty.
  512  * Caller must set state afterwards.
  513  */
  514 void
  515 runq_remove(struct runq *rq, struct thread *td)
  516 {
  517 
  518         runq_remove_idx(rq, td, NULL);
  519 }
  520 
  521 void
  522 runq_remove_idx(struct runq *rq, struct thread *td, u_char *idx)
  523 {
  524         struct rqhead *rqh;
  525         u_char pri;
  526 
  527         KASSERT(td->td_flags & TDF_INMEM,
  528                 ("runq_remove_idx: thread swapped out"));
  529         pri = td->td_rqindex;
  530         KASSERT(pri < RQ_NQS, ("runq_remove_idx: Invalid index %d\n", pri));
  531         rqh = &rq->rq_queues[pri];
  532         CTR4(KTR_RUNQ, "runq_remove_idx: td=%p, pri=%d %d rqh=%p",
  533             td, td->td_priority, pri, rqh);
  534         TAILQ_REMOVE(rqh, td, td_runq);
  535         if (TAILQ_EMPTY(rqh)) {
  536                 CTR0(KTR_RUNQ, "runq_remove_idx: empty");
  537                 runq_clrbit(rq, pri);
  538                 if (idx != NULL && *idx == pri)
  539                         *idx = (pri + 1) % RQ_NQS;
  540         }
  541 }

Cache object: 0586bdb4f5eab3632c5a23622b25df05


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.