The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/kern_switch.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 2001 Jake Burkholder <jake@FreeBSD.org>
    3  * All rights reserved.
    4  *
    5  * Redistribution and use in source and binary forms, with or without
    6  * modification, are permitted provided that the following conditions
    7  * are met:
    8  * 1. Redistributions of source code must retain the above copyright
    9  *    notice, this list of conditions and the following disclaimer.
   10  * 2. Redistributions in binary form must reproduce the above copyright
   11  *    notice, this list of conditions and the following disclaimer in the
   12  *    documentation and/or other materials provided with the distribution.
   13  *
   14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   24  * SUCH DAMAGE.
   25  */
   26 
   27 
   28 #include <sys/cdefs.h>
   29 __FBSDID("$FreeBSD: releng/8.1/sys/kern/kern_switch.c 199583 2009-11-20 15:27:52Z jhb $");
   30 
   31 #include "opt_sched.h"
   32 
   33 #include <sys/param.h>
   34 #include <sys/systm.h>
   35 #include <sys/kdb.h>
   36 #include <sys/kernel.h>
   37 #include <sys/ktr.h>
   38 #include <sys/lock.h>
   39 #include <sys/mutex.h>
   40 #include <sys/proc.h>
   41 #include <sys/queue.h>
   42 #include <sys/sched.h>
   43 #include <sys/smp.h>
   44 #include <sys/sysctl.h>
   45 
   46 #include <machine/cpu.h>
   47 
   48 /* Uncomment this to enable logging of critical_enter/exit. */
   49 #if 0
   50 #define KTR_CRITICAL    KTR_SCHED
   51 #else
   52 #define KTR_CRITICAL    0
   53 #endif
   54 
   55 #ifdef FULL_PREEMPTION
   56 #ifndef PREEMPTION
   57 #error "The FULL_PREEMPTION option requires the PREEMPTION option"
   58 #endif
   59 #endif
   60 
   61 CTASSERT((RQB_BPW * RQB_LEN) == RQ_NQS);
   62 
   63 /*
   64  * kern.sched.preemption allows user space to determine if preemption support
   65  * is compiled in or not.  It is not currently a boot or runtime flag that
   66  * can be changed.
   67  */
   68 #ifdef PREEMPTION
   69 static int kern_sched_preemption = 1;
   70 #else
   71 static int kern_sched_preemption = 0;
   72 #endif
   73 SYSCTL_INT(_kern_sched, OID_AUTO, preemption, CTLFLAG_RD,
   74     &kern_sched_preemption, 0, "Kernel preemption enabled");
   75 
   76 /*
   77  * Support for scheduler stats exported via kern.sched.stats.  All stats may
   78  * be reset with kern.sched.stats.reset = 1.  Stats may be defined elsewhere
   79  * with SCHED_STAT_DEFINE().
   80  */
   81 #ifdef SCHED_STATS
   82 SYSCTL_NODE(_kern_sched, OID_AUTO, stats, CTLFLAG_RW, 0, "switch stats");
   83 
   84 /* Switch reasons from mi_switch(). */
   85 DPCPU_DEFINE(long, sched_switch_stats[SWT_COUNT]);
   86 SCHED_STAT_DEFINE_VAR(uncategorized,
   87     &DPCPU_NAME(sched_switch_stats[SWT_NONE]), "");
   88 SCHED_STAT_DEFINE_VAR(preempt,
   89     &DPCPU_NAME(sched_switch_stats[SWT_PREEMPT]), "");
   90 SCHED_STAT_DEFINE_VAR(owepreempt,
   91     &DPCPU_NAME(sched_switch_stats[SWT_OWEPREEMPT]), "");
   92 SCHED_STAT_DEFINE_VAR(turnstile,
   93     &DPCPU_NAME(sched_switch_stats[SWT_TURNSTILE]), "");
   94 SCHED_STAT_DEFINE_VAR(sleepq,
   95     &DPCPU_NAME(sched_switch_stats[SWT_SLEEPQ]), "");
   96 SCHED_STAT_DEFINE_VAR(sleepqtimo,
   97     &DPCPU_NAME(sched_switch_stats[SWT_SLEEPQTIMO]), "");
   98 SCHED_STAT_DEFINE_VAR(relinquish, 
   99     &DPCPU_NAME(sched_switch_stats[SWT_RELINQUISH]), "");
  100 SCHED_STAT_DEFINE_VAR(needresched,
  101     &DPCPU_NAME(sched_switch_stats[SWT_NEEDRESCHED]), "");
  102 SCHED_STAT_DEFINE_VAR(idle, 
  103     &DPCPU_NAME(sched_switch_stats[SWT_IDLE]), "");
  104 SCHED_STAT_DEFINE_VAR(iwait,
  105     &DPCPU_NAME(sched_switch_stats[SWT_IWAIT]), "");
  106 SCHED_STAT_DEFINE_VAR(suspend,
  107     &DPCPU_NAME(sched_switch_stats[SWT_SUSPEND]), "");
  108 SCHED_STAT_DEFINE_VAR(remotepreempt,
  109     &DPCPU_NAME(sched_switch_stats[SWT_REMOTEPREEMPT]), "");
  110 SCHED_STAT_DEFINE_VAR(remotewakeidle,
  111     &DPCPU_NAME(sched_switch_stats[SWT_REMOTEWAKEIDLE]), "");
  112 
  113 static int
  114 sysctl_stats_reset(SYSCTL_HANDLER_ARGS)
  115 {
  116         struct sysctl_oid *p;
  117         uintptr_t counter;
  118         int error;
  119         int val;
  120         int i;
  121 
  122         val = 0;
  123         error = sysctl_handle_int(oidp, &val, 0, req);
  124         if (error != 0 || req->newptr == NULL)
  125                 return (error);
  126         if (val == 0)
  127                 return (0);
  128         /*
  129          * Traverse the list of children of _kern_sched_stats and reset each
  130          * to 0.  Skip the reset entry.
  131          */
  132         SLIST_FOREACH(p, oidp->oid_parent, oid_link) {
  133                 if (p == oidp || p->oid_arg1 == NULL)
  134                         continue;
  135                 counter = (uintptr_t)p->oid_arg1;
  136                 for (i = 0; i <= mp_maxid; i++) {
  137                         if (CPU_ABSENT(i))
  138                                 continue;
  139                         *(long *)(dpcpu_off[i] + counter) = 0;
  140                 }
  141         }
  142         return (0);
  143 }
  144 
  145 SYSCTL_PROC(_kern_sched_stats, OID_AUTO, reset, CTLTYPE_INT | CTLFLAG_WR, NULL,
  146     0, sysctl_stats_reset, "I", "Reset scheduler statistics");
  147 #endif
  148 
  149 /************************************************************************
  150  * Functions that manipulate runnability from a thread perspective.     *
  151  ************************************************************************/
  152 /*
  153  * Select the thread that will be run next.
  154  */
  155 struct thread *
  156 choosethread(void)
  157 {
  158         struct thread *td;
  159 
  160 retry:
  161         td = sched_choose();
  162 
  163         /*
  164          * If we are in panic, only allow system threads,
  165          * plus the one we are running in, to be run.
  166          */
  167         if (panicstr && ((td->td_proc->p_flag & P_SYSTEM) == 0 &&
  168             (td->td_flags & TDF_INPANIC) == 0)) {
  169                 /* note that it is no longer on the run queue */
  170                 TD_SET_CAN_RUN(td);
  171                 goto retry;
  172         }
  173 
  174         TD_SET_RUNNING(td);
  175         return (td);
  176 }
  177 
  178 /*
  179  * Kernel thread preemption implementation.  Critical sections mark
  180  * regions of code in which preemptions are not allowed.
  181  */
  182 void
  183 critical_enter(void)
  184 {
  185         struct thread *td;
  186 
  187         td = curthread;
  188         td->td_critnest++;
  189         CTR4(KTR_CRITICAL, "critical_enter by thread %p (%ld, %s) to %d", td,
  190             (long)td->td_proc->p_pid, td->td_name, td->td_critnest);
  191 }
  192 
  193 void
  194 critical_exit(void)
  195 {
  196         struct thread *td;
  197         int flags;
  198 
  199         td = curthread;
  200         KASSERT(td->td_critnest != 0,
  201             ("critical_exit: td_critnest == 0"));
  202 
  203         if (td->td_critnest == 1) {
  204                 td->td_critnest = 0;
  205                 if (td->td_owepreempt) {
  206                         td->td_critnest = 1;
  207                         thread_lock(td);
  208                         td->td_critnest--;
  209                         flags = SW_INVOL | SW_PREEMPT;
  210                         if (TD_IS_IDLETHREAD(td))
  211                                 flags |= SWT_IDLE;
  212                         else
  213                                 flags |= SWT_OWEPREEMPT;
  214                         mi_switch(flags, NULL);
  215                         thread_unlock(td);
  216                 }
  217         } else
  218                 td->td_critnest--;
  219 
  220         CTR4(KTR_CRITICAL, "critical_exit by thread %p (%ld, %s) to %d", td,
  221             (long)td->td_proc->p_pid, td->td_name, td->td_critnest);
  222 }
  223 
  224 /************************************************************************
  225  * SYSTEM RUN QUEUE manipulations and tests                             *
  226  ************************************************************************/
  227 /*
  228  * Initialize a run structure.
  229  */
  230 void
  231 runq_init(struct runq *rq)
  232 {
  233         int i;
  234 
  235         bzero(rq, sizeof *rq);
  236         for (i = 0; i < RQ_NQS; i++)
  237                 TAILQ_INIT(&rq->rq_queues[i]);
  238 }
  239 
  240 /*
  241  * Clear the status bit of the queue corresponding to priority level pri,
  242  * indicating that it is empty.
  243  */
  244 static __inline void
  245 runq_clrbit(struct runq *rq, int pri)
  246 {
  247         struct rqbits *rqb;
  248 
  249         rqb = &rq->rq_status;
  250         CTR4(KTR_RUNQ, "runq_clrbit: bits=%#x %#x bit=%#x word=%d",
  251             rqb->rqb_bits[RQB_WORD(pri)],
  252             rqb->rqb_bits[RQB_WORD(pri)] & ~RQB_BIT(pri),
  253             RQB_BIT(pri), RQB_WORD(pri));
  254         rqb->rqb_bits[RQB_WORD(pri)] &= ~RQB_BIT(pri);
  255 }
  256 
  257 /*
  258  * Find the index of the first non-empty run queue.  This is done by
  259  * scanning the status bits, a set bit indicates a non-empty queue.
  260  */
  261 static __inline int
  262 runq_findbit(struct runq *rq)
  263 {
  264         struct rqbits *rqb;
  265         int pri;
  266         int i;
  267 
  268         rqb = &rq->rq_status;
  269         for (i = 0; i < RQB_LEN; i++)
  270                 if (rqb->rqb_bits[i]) {
  271                         pri = RQB_FFS(rqb->rqb_bits[i]) + (i << RQB_L2BPW);
  272                         CTR3(KTR_RUNQ, "runq_findbit: bits=%#x i=%d pri=%d",
  273                             rqb->rqb_bits[i], i, pri);
  274                         return (pri);
  275                 }
  276 
  277         return (-1);
  278 }
  279 
  280 static __inline int
  281 runq_findbit_from(struct runq *rq, u_char pri)
  282 {
  283         struct rqbits *rqb;
  284         rqb_word_t mask;
  285         int i;
  286 
  287         /*
  288          * Set the mask for the first word so we ignore priorities before 'pri'.
  289          */
  290         mask = (rqb_word_t)-1 << (pri & (RQB_BPW - 1));
  291         rqb = &rq->rq_status;
  292 again:
  293         for (i = RQB_WORD(pri); i < RQB_LEN; mask = -1, i++) {
  294                 mask = rqb->rqb_bits[i] & mask;
  295                 if (mask == 0)
  296                         continue;
  297                 pri = RQB_FFS(mask) + (i << RQB_L2BPW);
  298                 CTR3(KTR_RUNQ, "runq_findbit_from: bits=%#x i=%d pri=%d",
  299                     mask, i, pri);
  300                 return (pri);
  301         }
  302         if (pri == 0)
  303                 return (-1);
  304         /*
  305          * Wrap back around to the beginning of the list just once so we
  306          * scan the whole thing.
  307          */
  308         pri = 0;
  309         goto again;
  310 }
  311 
  312 /*
  313  * Set the status bit of the queue corresponding to priority level pri,
  314  * indicating that it is non-empty.
  315  */
  316 static __inline void
  317 runq_setbit(struct runq *rq, int pri)
  318 {
  319         struct rqbits *rqb;
  320 
  321         rqb = &rq->rq_status;
  322         CTR4(KTR_RUNQ, "runq_setbit: bits=%#x %#x bit=%#x word=%d",
  323             rqb->rqb_bits[RQB_WORD(pri)],
  324             rqb->rqb_bits[RQB_WORD(pri)] | RQB_BIT(pri),
  325             RQB_BIT(pri), RQB_WORD(pri));
  326         rqb->rqb_bits[RQB_WORD(pri)] |= RQB_BIT(pri);
  327 }
  328 
  329 /*
  330  * Add the thread to the queue specified by its priority, and set the
  331  * corresponding status bit.
  332  */
  333 void
  334 runq_add(struct runq *rq, struct thread *td, int flags)
  335 {
  336         struct rqhead *rqh;
  337         int pri;
  338 
  339         pri = td->td_priority / RQ_PPQ;
  340         td->td_rqindex = pri;
  341         runq_setbit(rq, pri);
  342         rqh = &rq->rq_queues[pri];
  343         CTR4(KTR_RUNQ, "runq_add: td=%p pri=%d %d rqh=%p",
  344             td, td->td_priority, pri, rqh);
  345         if (flags & SRQ_PREEMPTED) {
  346                 TAILQ_INSERT_HEAD(rqh, td, td_runq);
  347         } else {
  348                 TAILQ_INSERT_TAIL(rqh, td, td_runq);
  349         }
  350 }
  351 
  352 void
  353 runq_add_pri(struct runq *rq, struct thread *td, u_char pri, int flags)
  354 {
  355         struct rqhead *rqh;
  356 
  357         KASSERT(pri < RQ_NQS, ("runq_add_pri: %d out of range", pri));
  358         td->td_rqindex = pri;
  359         runq_setbit(rq, pri);
  360         rqh = &rq->rq_queues[pri];
  361         CTR4(KTR_RUNQ, "runq_add_pri: td=%p pri=%d idx=%d rqh=%p",
  362             td, td->td_priority, pri, rqh);
  363         if (flags & SRQ_PREEMPTED) {
  364                 TAILQ_INSERT_HEAD(rqh, td, td_runq);
  365         } else {
  366                 TAILQ_INSERT_TAIL(rqh, td, td_runq);
  367         }
  368 }
  369 /*
  370  * Return true if there are runnable processes of any priority on the run
  371  * queue, false otherwise.  Has no side effects, does not modify the run
  372  * queue structure.
  373  */
  374 int
  375 runq_check(struct runq *rq)
  376 {
  377         struct rqbits *rqb;
  378         int i;
  379 
  380         rqb = &rq->rq_status;
  381         for (i = 0; i < RQB_LEN; i++)
  382                 if (rqb->rqb_bits[i]) {
  383                         CTR2(KTR_RUNQ, "runq_check: bits=%#x i=%d",
  384                             rqb->rqb_bits[i], i);
  385                         return (1);
  386                 }
  387         CTR0(KTR_RUNQ, "runq_check: empty");
  388 
  389         return (0);
  390 }
  391 
  392 /*
  393  * Find the highest priority process on the run queue.
  394  */
  395 struct thread *
  396 runq_choose_fuzz(struct runq *rq, int fuzz)
  397 {
  398         struct rqhead *rqh;
  399         struct thread *td;
  400         int pri;
  401 
  402         while ((pri = runq_findbit(rq)) != -1) {
  403                 rqh = &rq->rq_queues[pri];
  404                 /* fuzz == 1 is normal.. 0 or less are ignored */
  405                 if (fuzz > 1) {
  406                         /*
  407                          * In the first couple of entries, check if
  408                          * there is one for our CPU as a preference.
  409                          */
  410                         int count = fuzz;
  411                         int cpu = PCPU_GET(cpuid);
  412                         struct thread *td2;
  413                         td2 = td = TAILQ_FIRST(rqh);
  414 
  415                         while (count-- && td2) {
  416                                 if (td2->td_lastcpu == cpu) {
  417                                         td = td2;
  418                                         break;
  419                                 }
  420                                 td2 = TAILQ_NEXT(td2, td_runq);
  421                         }
  422                 } else
  423                         td = TAILQ_FIRST(rqh);
  424                 KASSERT(td != NULL, ("runq_choose_fuzz: no proc on busy queue"));
  425                 CTR3(KTR_RUNQ,
  426                     "runq_choose_fuzz: pri=%d thread=%p rqh=%p", pri, td, rqh);
  427                 return (td);
  428         }
  429         CTR1(KTR_RUNQ, "runq_choose_fuzz: idleproc pri=%d", pri);
  430 
  431         return (NULL);
  432 }
  433 
  434 /*
  435  * Find the highest priority process on the run queue.
  436  */
  437 struct thread *
  438 runq_choose(struct runq *rq)
  439 {
  440         struct rqhead *rqh;
  441         struct thread *td;
  442         int pri;
  443 
  444         while ((pri = runq_findbit(rq)) != -1) {
  445                 rqh = &rq->rq_queues[pri];
  446                 td = TAILQ_FIRST(rqh);
  447                 KASSERT(td != NULL, ("runq_choose: no thread on busy queue"));
  448                 CTR3(KTR_RUNQ,
  449                     "runq_choose: pri=%d thread=%p rqh=%p", pri, td, rqh);
  450                 return (td);
  451         }
  452         CTR1(KTR_RUNQ, "runq_choose: idlethread pri=%d", pri);
  453 
  454         return (NULL);
  455 }
  456 
  457 struct thread *
  458 runq_choose_from(struct runq *rq, u_char idx)
  459 {
  460         struct rqhead *rqh;
  461         struct thread *td;
  462         int pri;
  463 
  464         if ((pri = runq_findbit_from(rq, idx)) != -1) {
  465                 rqh = &rq->rq_queues[pri];
  466                 td = TAILQ_FIRST(rqh);
  467                 KASSERT(td != NULL, ("runq_choose: no thread on busy queue"));
  468                 CTR4(KTR_RUNQ,
  469                     "runq_choose_from: pri=%d thread=%p idx=%d rqh=%p",
  470                     pri, td, td->td_rqindex, rqh);
  471                 return (td);
  472         }
  473         CTR1(KTR_RUNQ, "runq_choose_from: idlethread pri=%d", pri);
  474 
  475         return (NULL);
  476 }
  477 /*
  478  * Remove the thread from the queue specified by its priority, and clear the
  479  * corresponding status bit if the queue becomes empty.
  480  * Caller must set state afterwards.
  481  */
  482 void
  483 runq_remove(struct runq *rq, struct thread *td)
  484 {
  485 
  486         runq_remove_idx(rq, td, NULL);
  487 }
  488 
  489 void
  490 runq_remove_idx(struct runq *rq, struct thread *td, u_char *idx)
  491 {
  492         struct rqhead *rqh;
  493         u_char pri;
  494 
  495         KASSERT(td->td_flags & TDF_INMEM,
  496                 ("runq_remove_idx: thread swapped out"));
  497         pri = td->td_rqindex;
  498         KASSERT(pri < RQ_NQS, ("runq_remove_idx: Invalid index %d\n", pri));
  499         rqh = &rq->rq_queues[pri];
  500         CTR4(KTR_RUNQ, "runq_remove_idx: td=%p, pri=%d %d rqh=%p",
  501             td, td->td_priority, pri, rqh);
  502         TAILQ_REMOVE(rqh, td, td_runq);
  503         if (TAILQ_EMPTY(rqh)) {
  504                 CTR0(KTR_RUNQ, "runq_remove_idx: empty");
  505                 runq_clrbit(rq, pri);
  506                 if (idx != NULL && *idx == pri)
  507                         *idx = (pri + 1) % RQ_NQS;
  508         }
  509 }

Cache object: 458278975b668f0d10aa5b718eaa2c26


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.