The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/kern_switch.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 2001 Jake Burkholder <jake@FreeBSD.org>
    3  * All rights reserved.
    4  *
    5  * Redistribution and use in source and binary forms, with or without
    6  * modification, are permitted provided that the following conditions
    7  * are met:
    8  * 1. Redistributions of source code must retain the above copyright
    9  *    notice, this list of conditions and the following disclaimer.
   10  * 2. Redistributions in binary form must reproduce the above copyright
   11  *    notice, this list of conditions and the following disclaimer in the
   12  *    documentation and/or other materials provided with the distribution.
   13  *
   14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   24  * SUCH DAMAGE.
   25  */
   26 
   27 
   28 #include <sys/cdefs.h>
   29 __FBSDID("$FreeBSD$");
   30 
   31 #include "opt_sched.h"
   32 
   33 #include <sys/param.h>
   34 #include <sys/systm.h>
   35 #include <sys/kdb.h>
   36 #include <sys/kernel.h>
   37 #include <sys/ktr.h>
   38 #include <sys/lock.h>
   39 #include <sys/mutex.h>
   40 #include <sys/proc.h>
   41 #include <sys/queue.h>
   42 #include <sys/sched.h>
   43 #include <sys/smp.h>
   44 #include <sys/sysctl.h>
   45 
   46 #include <machine/cpu.h>
   47 
   48 /* Uncomment this to enable logging of critical_enter/exit. */
   49 #if 0
   50 #define KTR_CRITICAL    KTR_SCHED
   51 #else
   52 #define KTR_CRITICAL    0
   53 #endif
   54 
   55 #ifdef FULL_PREEMPTION
   56 #ifndef PREEMPTION
   57 #error "The FULL_PREEMPTION option requires the PREEMPTION option"
   58 #endif
   59 #endif
   60 
   61 CTASSERT((RQB_BPW * RQB_LEN) == RQ_NQS);
   62 
   63 /*
   64  * kern.sched.preemption allows user space to determine if preemption support
   65  * is compiled in or not.  It is not currently a boot or runtime flag that
   66  * can be changed.
   67  */
   68 #ifdef PREEMPTION
   69 static int kern_sched_preemption = 1;
   70 #else
   71 static int kern_sched_preemption = 0;
   72 #endif
   73 SYSCTL_INT(_kern_sched, OID_AUTO, preemption, CTLFLAG_RD,
   74     &kern_sched_preemption, 0, "Kernel preemption enabled");
   75 
   76 /*
   77  * Support for scheduler stats exported via kern.sched.stats.  All stats may
   78  * be reset with kern.sched.stats.reset = 1.  Stats may be defined elsewhere
   79  * with SCHED_STAT_DEFINE().
   80  */
   81 #ifdef SCHED_STATS
   82 SYSCTL_NODE(_kern_sched, OID_AUTO, stats, CTLFLAG_RW, 0, "switch stats");
   83 
   84 /* Switch reasons from mi_switch(). */
   85 DPCPU_DEFINE(long, sched_switch_stats[SWT_COUNT]);
   86 SCHED_STAT_DEFINE_VAR(uncategorized,
   87     &DPCPU_NAME(sched_switch_stats[SWT_NONE]), "");
   88 SCHED_STAT_DEFINE_VAR(preempt,
   89     &DPCPU_NAME(sched_switch_stats[SWT_PREEMPT]), "");
   90 SCHED_STAT_DEFINE_VAR(owepreempt,
   91     &DPCPU_NAME(sched_switch_stats[SWT_OWEPREEMPT]), "");
   92 SCHED_STAT_DEFINE_VAR(turnstile,
   93     &DPCPU_NAME(sched_switch_stats[SWT_TURNSTILE]), "");
   94 SCHED_STAT_DEFINE_VAR(sleepq,
   95     &DPCPU_NAME(sched_switch_stats[SWT_SLEEPQ]), "");
   96 SCHED_STAT_DEFINE_VAR(sleepqtimo,
   97     &DPCPU_NAME(sched_switch_stats[SWT_SLEEPQTIMO]), "");
   98 SCHED_STAT_DEFINE_VAR(relinquish, 
   99     &DPCPU_NAME(sched_switch_stats[SWT_RELINQUISH]), "");
  100 SCHED_STAT_DEFINE_VAR(needresched,
  101     &DPCPU_NAME(sched_switch_stats[SWT_NEEDRESCHED]), "");
  102 SCHED_STAT_DEFINE_VAR(idle, 
  103     &DPCPU_NAME(sched_switch_stats[SWT_IDLE]), "");
  104 SCHED_STAT_DEFINE_VAR(iwait,
  105     &DPCPU_NAME(sched_switch_stats[SWT_IWAIT]), "");
  106 SCHED_STAT_DEFINE_VAR(suspend,
  107     &DPCPU_NAME(sched_switch_stats[SWT_SUSPEND]), "");
  108 SCHED_STAT_DEFINE_VAR(remotepreempt,
  109     &DPCPU_NAME(sched_switch_stats[SWT_REMOTEPREEMPT]), "");
  110 SCHED_STAT_DEFINE_VAR(remotewakeidle,
  111     &DPCPU_NAME(sched_switch_stats[SWT_REMOTEWAKEIDLE]), "");
  112 
  113 static int
  114 sysctl_stats_reset(SYSCTL_HANDLER_ARGS)
  115 {
  116         struct sysctl_oid *p;
  117         uintptr_t counter;
  118         int error;
  119         int val;
  120         int i;
  121 
  122         val = 0;
  123         error = sysctl_handle_int(oidp, &val, 0, req);
  124         if (error != 0 || req->newptr == NULL)
  125                 return (error);
  126         if (val == 0)
  127                 return (0);
  128         /*
  129          * Traverse the list of children of _kern_sched_stats and reset each
  130          * to 0.  Skip the reset entry.
  131          */
  132         SLIST_FOREACH(p, oidp->oid_parent, oid_link) {
  133                 if (p == oidp || p->oid_arg1 == NULL)
  134                         continue;
  135                 counter = (uintptr_t)p->oid_arg1;
  136                 CPU_FOREACH(i) {
  137                         *(long *)(dpcpu_off[i] + counter) = 0;
  138                 }
  139         }
  140         return (0);
  141 }
  142 
  143 SYSCTL_PROC(_kern_sched_stats, OID_AUTO, reset, CTLTYPE_INT | CTLFLAG_WR, NULL,
  144     0, sysctl_stats_reset, "I", "Reset scheduler statistics");
  145 #endif
  146 
  147 /************************************************************************
  148  * Functions that manipulate runnability from a thread perspective.     *
  149  ************************************************************************/
  150 /*
  151  * Select the thread that will be run next.
  152  */
  153 struct thread *
  154 choosethread(void)
  155 {
  156         struct thread *td;
  157 
  158 retry:
  159         td = sched_choose();
  160 
  161         /*
  162          * If we are in panic, only allow system threads,
  163          * plus the one we are running in, to be run.
  164          */
  165         if (panicstr && ((td->td_proc->p_flag & P_SYSTEM) == 0 &&
  166             (td->td_flags & TDF_INPANIC) == 0)) {
  167                 /* note that it is no longer on the run queue */
  168                 TD_SET_CAN_RUN(td);
  169                 goto retry;
  170         }
  171 
  172         TD_SET_RUNNING(td);
  173         return (td);
  174 }
  175 
  176 /*
  177  * Kernel thread preemption implementation.  Critical sections mark
  178  * regions of code in which preemptions are not allowed.
  179  *
  180  * It might seem a good idea to inline critical_enter() but, in order
  181  * to prevent instructions reordering by the compiler, a __compiler_membar()
  182  * would have to be used here (the same as sched_pin()).  The performance
  183  * penalty imposed by the membar could, then, produce slower code than
  184  * the function call itself, for most cases.
  185  */
  186 void
  187 critical_enter(void)
  188 {
  189         struct thread *td;
  190 
  191         td = curthread;
  192         td->td_critnest++;
  193         CTR4(KTR_CRITICAL, "critical_enter by thread %p (%ld, %s) to %d", td,
  194             (long)td->td_proc->p_pid, td->td_name, td->td_critnest);
  195 }
  196 
  197 void
  198 critical_exit(void)
  199 {
  200         struct thread *td;
  201         int flags;
  202 
  203         td = curthread;
  204         KASSERT(td->td_critnest != 0,
  205             ("critical_exit: td_critnest == 0"));
  206 
  207         if (td->td_critnest == 1) {
  208                 td->td_critnest = 0;
  209 
  210                 /*
  211                  * Interrupt handlers execute critical_exit() on
  212                  * leave, and td_owepreempt may be left set by an
  213                  * interrupt handler only when td_critnest > 0.  If we
  214                  * are decrementing td_critnest from 1 to 0, read
  215                  * td_owepreempt after decrementing, to not miss the
  216                  * preempt.  Disallow compiler to reorder operations.
  217                  */
  218                 __compiler_membar();
  219                 if (td->td_owepreempt && !kdb_active) {
  220                         /*
  221                          * Microoptimization: we committed to switch,
  222                          * disable preemption in interrupt handlers
  223                          * while spinning for the thread lock.
  224                          */
  225                         td->td_critnest = 1;
  226                         thread_lock(td);
  227                         td->td_critnest--;
  228                         flags = SW_INVOL | SW_PREEMPT;
  229                         if (TD_IS_IDLETHREAD(td))
  230                                 flags |= SWT_IDLE;
  231                         else
  232                                 flags |= SWT_OWEPREEMPT;
  233                         mi_switch(flags, NULL);
  234                         thread_unlock(td);
  235                 }
  236         } else
  237                 td->td_critnest--;
  238 
  239         CTR4(KTR_CRITICAL, "critical_exit by thread %p (%ld, %s) to %d", td,
  240             (long)td->td_proc->p_pid, td->td_name, td->td_critnest);
  241 }
  242 
  243 /************************************************************************
  244  * SYSTEM RUN QUEUE manipulations and tests                             *
  245  ************************************************************************/
  246 /*
  247  * Initialize a run structure.
  248  */
  249 void
  250 runq_init(struct runq *rq)
  251 {
  252         int i;
  253 
  254         bzero(rq, sizeof *rq);
  255         for (i = 0; i < RQ_NQS; i++)
  256                 TAILQ_INIT(&rq->rq_queues[i]);
  257 }
  258 
  259 /*
  260  * Clear the status bit of the queue corresponding to priority level pri,
  261  * indicating that it is empty.
  262  */
  263 static __inline void
  264 runq_clrbit(struct runq *rq, int pri)
  265 {
  266         struct rqbits *rqb;
  267 
  268         rqb = &rq->rq_status;
  269         CTR4(KTR_RUNQ, "runq_clrbit: bits=%#x %#x bit=%#x word=%d",
  270             rqb->rqb_bits[RQB_WORD(pri)],
  271             rqb->rqb_bits[RQB_WORD(pri)] & ~RQB_BIT(pri),
  272             RQB_BIT(pri), RQB_WORD(pri));
  273         rqb->rqb_bits[RQB_WORD(pri)] &= ~RQB_BIT(pri);
  274 }
  275 
  276 /*
  277  * Find the index of the first non-empty run queue.  This is done by
  278  * scanning the status bits, a set bit indicates a non-empty queue.
  279  */
  280 static __inline int
  281 runq_findbit(struct runq *rq)
  282 {
  283         struct rqbits *rqb;
  284         int pri;
  285         int i;
  286 
  287         rqb = &rq->rq_status;
  288         for (i = 0; i < RQB_LEN; i++)
  289                 if (rqb->rqb_bits[i]) {
  290                         pri = RQB_FFS(rqb->rqb_bits[i]) + (i << RQB_L2BPW);
  291                         CTR3(KTR_RUNQ, "runq_findbit: bits=%#x i=%d pri=%d",
  292                             rqb->rqb_bits[i], i, pri);
  293                         return (pri);
  294                 }
  295 
  296         return (-1);
  297 }
  298 
  299 static __inline int
  300 runq_findbit_from(struct runq *rq, u_char pri)
  301 {
  302         struct rqbits *rqb;
  303         rqb_word_t mask;
  304         int i;
  305 
  306         /*
  307          * Set the mask for the first word so we ignore priorities before 'pri'.
  308          */
  309         mask = (rqb_word_t)-1 << (pri & (RQB_BPW - 1));
  310         rqb = &rq->rq_status;
  311 again:
  312         for (i = RQB_WORD(pri); i < RQB_LEN; mask = -1, i++) {
  313                 mask = rqb->rqb_bits[i] & mask;
  314                 if (mask == 0)
  315                         continue;
  316                 pri = RQB_FFS(mask) + (i << RQB_L2BPW);
  317                 CTR3(KTR_RUNQ, "runq_findbit_from: bits=%#x i=%d pri=%d",
  318                     mask, i, pri);
  319                 return (pri);
  320         }
  321         if (pri == 0)
  322                 return (-1);
  323         /*
  324          * Wrap back around to the beginning of the list just once so we
  325          * scan the whole thing.
  326          */
  327         pri = 0;
  328         goto again;
  329 }
  330 
  331 /*
  332  * Set the status bit of the queue corresponding to priority level pri,
  333  * indicating that it is non-empty.
  334  */
  335 static __inline void
  336 runq_setbit(struct runq *rq, int pri)
  337 {
  338         struct rqbits *rqb;
  339 
  340         rqb = &rq->rq_status;
  341         CTR4(KTR_RUNQ, "runq_setbit: bits=%#x %#x bit=%#x word=%d",
  342             rqb->rqb_bits[RQB_WORD(pri)],
  343             rqb->rqb_bits[RQB_WORD(pri)] | RQB_BIT(pri),
  344             RQB_BIT(pri), RQB_WORD(pri));
  345         rqb->rqb_bits[RQB_WORD(pri)] |= RQB_BIT(pri);
  346 }
  347 
  348 /*
  349  * Add the thread to the queue specified by its priority, and set the
  350  * corresponding status bit.
  351  */
  352 void
  353 runq_add(struct runq *rq, struct thread *td, int flags)
  354 {
  355         struct rqhead *rqh;
  356         int pri;
  357 
  358         pri = td->td_priority / RQ_PPQ;
  359         td->td_rqindex = pri;
  360         runq_setbit(rq, pri);
  361         rqh = &rq->rq_queues[pri];
  362         CTR4(KTR_RUNQ, "runq_add: td=%p pri=%d %d rqh=%p",
  363             td, td->td_priority, pri, rqh);
  364         if (flags & SRQ_PREEMPTED) {
  365                 TAILQ_INSERT_HEAD(rqh, td, td_runq);
  366         } else {
  367                 TAILQ_INSERT_TAIL(rqh, td, td_runq);
  368         }
  369 }
  370 
  371 void
  372 runq_add_pri(struct runq *rq, struct thread *td, u_char pri, int flags)
  373 {
  374         struct rqhead *rqh;
  375 
  376         KASSERT(pri < RQ_NQS, ("runq_add_pri: %d out of range", pri));
  377         td->td_rqindex = pri;
  378         runq_setbit(rq, pri);
  379         rqh = &rq->rq_queues[pri];
  380         CTR4(KTR_RUNQ, "runq_add_pri: td=%p pri=%d idx=%d rqh=%p",
  381             td, td->td_priority, pri, rqh);
  382         if (flags & SRQ_PREEMPTED) {
  383                 TAILQ_INSERT_HEAD(rqh, td, td_runq);
  384         } else {
  385                 TAILQ_INSERT_TAIL(rqh, td, td_runq);
  386         }
  387 }
  388 /*
  389  * Return true if there are runnable processes of any priority on the run
  390  * queue, false otherwise.  Has no side effects, does not modify the run
  391  * queue structure.
  392  */
  393 int
  394 runq_check(struct runq *rq)
  395 {
  396         struct rqbits *rqb;
  397         int i;
  398 
  399         rqb = &rq->rq_status;
  400         for (i = 0; i < RQB_LEN; i++)
  401                 if (rqb->rqb_bits[i]) {
  402                         CTR2(KTR_RUNQ, "runq_check: bits=%#x i=%d",
  403                             rqb->rqb_bits[i], i);
  404                         return (1);
  405                 }
  406         CTR0(KTR_RUNQ, "runq_check: empty");
  407 
  408         return (0);
  409 }
  410 
  411 /*
  412  * Find the highest priority process on the run queue.
  413  */
  414 struct thread *
  415 runq_choose_fuzz(struct runq *rq, int fuzz)
  416 {
  417         struct rqhead *rqh;
  418         struct thread *td;
  419         int pri;
  420 
  421         while ((pri = runq_findbit(rq)) != -1) {
  422                 rqh = &rq->rq_queues[pri];
  423                 /* fuzz == 1 is normal.. 0 or less are ignored */
  424                 if (fuzz > 1) {
  425                         /*
  426                          * In the first couple of entries, check if
  427                          * there is one for our CPU as a preference.
  428                          */
  429                         int count = fuzz;
  430                         int cpu = PCPU_GET(cpuid);
  431                         struct thread *td2;
  432                         td2 = td = TAILQ_FIRST(rqh);
  433 
  434                         while (count-- && td2) {
  435                                 if (td2->td_lastcpu == cpu) {
  436                                         td = td2;
  437                                         break;
  438                                 }
  439                                 td2 = TAILQ_NEXT(td2, td_runq);
  440                         }
  441                 } else
  442                         td = TAILQ_FIRST(rqh);
  443                 KASSERT(td != NULL, ("runq_choose_fuzz: no proc on busy queue"));
  444                 CTR3(KTR_RUNQ,
  445                     "runq_choose_fuzz: pri=%d thread=%p rqh=%p", pri, td, rqh);
  446                 return (td);
  447         }
  448         CTR1(KTR_RUNQ, "runq_choose_fuzz: idleproc pri=%d", pri);
  449 
  450         return (NULL);
  451 }
  452 
  453 /*
  454  * Find the highest priority process on the run queue.
  455  */
  456 struct thread *
  457 runq_choose(struct runq *rq)
  458 {
  459         struct rqhead *rqh;
  460         struct thread *td;
  461         int pri;
  462 
  463         while ((pri = runq_findbit(rq)) != -1) {
  464                 rqh = &rq->rq_queues[pri];
  465                 td = TAILQ_FIRST(rqh);
  466                 KASSERT(td != NULL, ("runq_choose: no thread on busy queue"));
  467                 CTR3(KTR_RUNQ,
  468                     "runq_choose: pri=%d thread=%p rqh=%p", pri, td, rqh);
  469                 return (td);
  470         }
  471         CTR1(KTR_RUNQ, "runq_choose: idlethread pri=%d", pri);
  472 
  473         return (NULL);
  474 }
  475 
  476 struct thread *
  477 runq_choose_from(struct runq *rq, u_char idx)
  478 {
  479         struct rqhead *rqh;
  480         struct thread *td;
  481         int pri;
  482 
  483         if ((pri = runq_findbit_from(rq, idx)) != -1) {
  484                 rqh = &rq->rq_queues[pri];
  485                 td = TAILQ_FIRST(rqh);
  486                 KASSERT(td != NULL, ("runq_choose: no thread on busy queue"));
  487                 CTR4(KTR_RUNQ,
  488                     "runq_choose_from: pri=%d thread=%p idx=%d rqh=%p",
  489                     pri, td, td->td_rqindex, rqh);
  490                 return (td);
  491         }
  492         CTR1(KTR_RUNQ, "runq_choose_from: idlethread pri=%d", pri);
  493 
  494         return (NULL);
  495 }
  496 /*
  497  * Remove the thread from the queue specified by its priority, and clear the
  498  * corresponding status bit if the queue becomes empty.
  499  * Caller must set state afterwards.
  500  */
  501 void
  502 runq_remove(struct runq *rq, struct thread *td)
  503 {
  504 
  505         runq_remove_idx(rq, td, NULL);
  506 }
  507 
  508 void
  509 runq_remove_idx(struct runq *rq, struct thread *td, u_char *idx)
  510 {
  511         struct rqhead *rqh;
  512         u_char pri;
  513 
  514         KASSERT(td->td_flags & TDF_INMEM,
  515                 ("runq_remove_idx: thread swapped out"));
  516         pri = td->td_rqindex;
  517         KASSERT(pri < RQ_NQS, ("runq_remove_idx: Invalid index %d\n", pri));
  518         rqh = &rq->rq_queues[pri];
  519         CTR4(KTR_RUNQ, "runq_remove_idx: td=%p, pri=%d %d rqh=%p",
  520             td, td->td_priority, pri, rqh);
  521         TAILQ_REMOVE(rqh, td, td_runq);
  522         if (TAILQ_EMPTY(rqh)) {
  523                 CTR0(KTR_RUNQ, "runq_remove_idx: empty");
  524                 runq_clrbit(rq, pri);
  525                 if (idx != NULL && *idx == pri)
  526                         *idx = (pri + 1) % RQ_NQS;
  527         }
  528 }

Cache object: e2ad6538cc5a769ea28ea291db8c4580


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.