The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/kern_switch.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 2001 Jake Burkholder <jake@FreeBSD.org>
    3  * All rights reserved.
    4  *
    5  * Redistribution and use in source and binary forms, with or without
    6  * modification, are permitted provided that the following conditions
    7  * are met:
    8  * 1. Redistributions of source code must retain the above copyright
    9  *    notice, this list of conditions and the following disclaimer.
   10  * 2. Redistributions in binary form must reproduce the above copyright
   11  *    notice, this list of conditions and the following disclaimer in the
   12  *    documentation and/or other materials provided with the distribution.
   13  *
   14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   24  * SUCH DAMAGE.
   25  */
   26 
   27 
   28 #include <sys/cdefs.h>
   29 __FBSDID("$FreeBSD: releng/8.4/sys/kern/kern_switch.c 230176 2012-01-15 22:23:41Z avg $");
   30 
   31 #include "opt_sched.h"
   32 
   33 #include <sys/param.h>
   34 #include <sys/systm.h>
   35 #include <sys/kdb.h>
   36 #include <sys/kernel.h>
   37 #include <sys/ktr.h>
   38 #include <sys/lock.h>
   39 #include <sys/mutex.h>
   40 #include <sys/proc.h>
   41 #include <sys/queue.h>
   42 #include <sys/sched.h>
   43 #include <sys/smp.h>
   44 #include <sys/sysctl.h>
   45 
   46 #include <machine/cpu.h>
   47 
   48 /* Uncomment this to enable logging of critical_enter/exit. */
   49 #if 0
   50 #define KTR_CRITICAL    KTR_SCHED
   51 #else
   52 #define KTR_CRITICAL    0
   53 #endif
   54 
   55 #ifdef FULL_PREEMPTION
   56 #ifndef PREEMPTION
   57 #error "The FULL_PREEMPTION option requires the PREEMPTION option"
   58 #endif
   59 #endif
   60 
   61 CTASSERT((RQB_BPW * RQB_LEN) == RQ_NQS);
   62 
   63 /*
   64  * kern.sched.preemption allows user space to determine if preemption support
   65  * is compiled in or not.  It is not currently a boot or runtime flag that
   66  * can be changed.
   67  */
   68 #ifdef PREEMPTION
   69 static int kern_sched_preemption = 1;
   70 #else
   71 static int kern_sched_preemption = 0;
   72 #endif
   73 SYSCTL_INT(_kern_sched, OID_AUTO, preemption, CTLFLAG_RD,
   74     &kern_sched_preemption, 0, "Kernel preemption enabled");
   75 
   76 /*
   77  * Support for scheduler stats exported via kern.sched.stats.  All stats may
   78  * be reset with kern.sched.stats.reset = 1.  Stats may be defined elsewhere
   79  * with SCHED_STAT_DEFINE().
   80  */
   81 #ifdef SCHED_STATS
   82 SYSCTL_NODE(_kern_sched, OID_AUTO, stats, CTLFLAG_RW, 0, "switch stats");
   83 
   84 /* Switch reasons from mi_switch(). */
   85 DPCPU_DEFINE(long, sched_switch_stats[SWT_COUNT]);
   86 SCHED_STAT_DEFINE_VAR(uncategorized,
   87     &DPCPU_NAME(sched_switch_stats[SWT_NONE]), "");
   88 SCHED_STAT_DEFINE_VAR(preempt,
   89     &DPCPU_NAME(sched_switch_stats[SWT_PREEMPT]), "");
   90 SCHED_STAT_DEFINE_VAR(owepreempt,
   91     &DPCPU_NAME(sched_switch_stats[SWT_OWEPREEMPT]), "");
   92 SCHED_STAT_DEFINE_VAR(turnstile,
   93     &DPCPU_NAME(sched_switch_stats[SWT_TURNSTILE]), "");
   94 SCHED_STAT_DEFINE_VAR(sleepq,
   95     &DPCPU_NAME(sched_switch_stats[SWT_SLEEPQ]), "");
   96 SCHED_STAT_DEFINE_VAR(sleepqtimo,
   97     &DPCPU_NAME(sched_switch_stats[SWT_SLEEPQTIMO]), "");
   98 SCHED_STAT_DEFINE_VAR(relinquish, 
   99     &DPCPU_NAME(sched_switch_stats[SWT_RELINQUISH]), "");
  100 SCHED_STAT_DEFINE_VAR(needresched,
  101     &DPCPU_NAME(sched_switch_stats[SWT_NEEDRESCHED]), "");
  102 SCHED_STAT_DEFINE_VAR(idle, 
  103     &DPCPU_NAME(sched_switch_stats[SWT_IDLE]), "");
  104 SCHED_STAT_DEFINE_VAR(iwait,
  105     &DPCPU_NAME(sched_switch_stats[SWT_IWAIT]), "");
  106 SCHED_STAT_DEFINE_VAR(suspend,
  107     &DPCPU_NAME(sched_switch_stats[SWT_SUSPEND]), "");
  108 SCHED_STAT_DEFINE_VAR(remotepreempt,
  109     &DPCPU_NAME(sched_switch_stats[SWT_REMOTEPREEMPT]), "");
  110 SCHED_STAT_DEFINE_VAR(remotewakeidle,
  111     &DPCPU_NAME(sched_switch_stats[SWT_REMOTEWAKEIDLE]), "");
  112 
  113 static int
  114 sysctl_stats_reset(SYSCTL_HANDLER_ARGS)
  115 {
  116         struct sysctl_oid *p;
  117         uintptr_t counter;
  118         int error;
  119         int val;
  120         int i;
  121 
  122         val = 0;
  123         error = sysctl_handle_int(oidp, &val, 0, req);
  124         if (error != 0 || req->newptr == NULL)
  125                 return (error);
  126         if (val == 0)
  127                 return (0);
  128         /*
  129          * Traverse the list of children of _kern_sched_stats and reset each
  130          * to 0.  Skip the reset entry.
  131          */
  132         SLIST_FOREACH(p, oidp->oid_parent, oid_link) {
  133                 if (p == oidp || p->oid_arg1 == NULL)
  134                         continue;
  135                 counter = (uintptr_t)p->oid_arg1;
  136                 CPU_FOREACH(i) {
  137                         *(long *)(dpcpu_off[i] + counter) = 0;
  138                 }
  139         }
  140         return (0);
  141 }
  142 
  143 SYSCTL_PROC(_kern_sched_stats, OID_AUTO, reset, CTLTYPE_INT | CTLFLAG_WR, NULL,
  144     0, sysctl_stats_reset, "I", "Reset scheduler statistics");
  145 #endif
  146 
  147 /************************************************************************
  148  * Functions that manipulate runnability from a thread perspective.     *
  149  ************************************************************************/
  150 /*
  151  * Select the thread that will be run next.
  152  */
  153 struct thread *
  154 choosethread(void)
  155 {
  156         struct thread *td;
  157 
  158 retry:
  159         td = sched_choose();
  160 
  161         /*
  162          * If we are in panic, only allow system threads,
  163          * plus the one we are running in, to be run.
  164          */
  165         if (panicstr && ((td->td_proc->p_flag & P_SYSTEM) == 0 &&
  166             (td->td_flags & TDF_INPANIC) == 0)) {
  167                 /* note that it is no longer on the run queue */
  168                 TD_SET_CAN_RUN(td);
  169                 goto retry;
  170         }
  171 
  172         TD_SET_RUNNING(td);
  173         return (td);
  174 }
  175 
  176 /*
  177  * Kernel thread preemption implementation.  Critical sections mark
  178  * regions of code in which preemptions are not allowed.
  179  */
  180 void
  181 critical_enter(void)
  182 {
  183         struct thread *td;
  184 
  185         td = curthread;
  186         td->td_critnest++;
  187         CTR4(KTR_CRITICAL, "critical_enter by thread %p (%ld, %s) to %d", td,
  188             (long)td->td_proc->p_pid, td->td_name, td->td_critnest);
  189 }
  190 
  191 void
  192 critical_exit(void)
  193 {
  194         struct thread *td;
  195         int flags;
  196 
  197         td = curthread;
  198         KASSERT(td->td_critnest != 0,
  199             ("critical_exit: td_critnest == 0"));
  200 
  201         if (td->td_critnest == 1) {
  202                 td->td_critnest = 0;
  203                 if (td->td_owepreempt && !kdb_active) {
  204                         td->td_critnest = 1;
  205                         thread_lock(td);
  206                         td->td_critnest--;
  207                         flags = SW_INVOL | SW_PREEMPT;
  208                         if (TD_IS_IDLETHREAD(td))
  209                                 flags |= SWT_IDLE;
  210                         else
  211                                 flags |= SWT_OWEPREEMPT;
  212                         mi_switch(flags, NULL);
  213                         thread_unlock(td);
  214                 }
  215         } else
  216                 td->td_critnest--;
  217 
  218         CTR4(KTR_CRITICAL, "critical_exit by thread %p (%ld, %s) to %d", td,
  219             (long)td->td_proc->p_pid, td->td_name, td->td_critnest);
  220 }
  221 
  222 /************************************************************************
  223  * SYSTEM RUN QUEUE manipulations and tests                             *
  224  ************************************************************************/
  225 /*
  226  * Initialize a run structure.
  227  */
  228 void
  229 runq_init(struct runq *rq)
  230 {
  231         int i;
  232 
  233         bzero(rq, sizeof *rq);
  234         for (i = 0; i < RQ_NQS; i++)
  235                 TAILQ_INIT(&rq->rq_queues[i]);
  236 }
  237 
  238 /*
  239  * Clear the status bit of the queue corresponding to priority level pri,
  240  * indicating that it is empty.
  241  */
  242 static __inline void
  243 runq_clrbit(struct runq *rq, int pri)
  244 {
  245         struct rqbits *rqb;
  246 
  247         rqb = &rq->rq_status;
  248         CTR4(KTR_RUNQ, "runq_clrbit: bits=%#x %#x bit=%#x word=%d",
  249             rqb->rqb_bits[RQB_WORD(pri)],
  250             rqb->rqb_bits[RQB_WORD(pri)] & ~RQB_BIT(pri),
  251             RQB_BIT(pri), RQB_WORD(pri));
  252         rqb->rqb_bits[RQB_WORD(pri)] &= ~RQB_BIT(pri);
  253 }
  254 
  255 /*
  256  * Find the index of the first non-empty run queue.  This is done by
  257  * scanning the status bits, a set bit indicates a non-empty queue.
  258  */
  259 static __inline int
  260 runq_findbit(struct runq *rq)
  261 {
  262         struct rqbits *rqb;
  263         int pri;
  264         int i;
  265 
  266         rqb = &rq->rq_status;
  267         for (i = 0; i < RQB_LEN; i++)
  268                 if (rqb->rqb_bits[i]) {
  269                         pri = RQB_FFS(rqb->rqb_bits[i]) + (i << RQB_L2BPW);
  270                         CTR3(KTR_RUNQ, "runq_findbit: bits=%#x i=%d pri=%d",
  271                             rqb->rqb_bits[i], i, pri);
  272                         return (pri);
  273                 }
  274 
  275         return (-1);
  276 }
  277 
  278 static __inline int
  279 runq_findbit_from(struct runq *rq, u_char pri)
  280 {
  281         struct rqbits *rqb;
  282         rqb_word_t mask;
  283         int i;
  284 
  285         /*
  286          * Set the mask for the first word so we ignore priorities before 'pri'.
  287          */
  288         mask = (rqb_word_t)-1 << (pri & (RQB_BPW - 1));
  289         rqb = &rq->rq_status;
  290 again:
  291         for (i = RQB_WORD(pri); i < RQB_LEN; mask = -1, i++) {
  292                 mask = rqb->rqb_bits[i] & mask;
  293                 if (mask == 0)
  294                         continue;
  295                 pri = RQB_FFS(mask) + (i << RQB_L2BPW);
  296                 CTR3(KTR_RUNQ, "runq_findbit_from: bits=%#x i=%d pri=%d",
  297                     mask, i, pri);
  298                 return (pri);
  299         }
  300         if (pri == 0)
  301                 return (-1);
  302         /*
  303          * Wrap back around to the beginning of the list just once so we
  304          * scan the whole thing.
  305          */
  306         pri = 0;
  307         goto again;
  308 }
  309 
  310 /*
  311  * Set the status bit of the queue corresponding to priority level pri,
  312  * indicating that it is non-empty.
  313  */
  314 static __inline void
  315 runq_setbit(struct runq *rq, int pri)
  316 {
  317         struct rqbits *rqb;
  318 
  319         rqb = &rq->rq_status;
  320         CTR4(KTR_RUNQ, "runq_setbit: bits=%#x %#x bit=%#x word=%d",
  321             rqb->rqb_bits[RQB_WORD(pri)],
  322             rqb->rqb_bits[RQB_WORD(pri)] | RQB_BIT(pri),
  323             RQB_BIT(pri), RQB_WORD(pri));
  324         rqb->rqb_bits[RQB_WORD(pri)] |= RQB_BIT(pri);
  325 }
  326 
  327 /*
  328  * Add the thread to the queue specified by its priority, and set the
  329  * corresponding status bit.
  330  */
  331 void
  332 runq_add(struct runq *rq, struct thread *td, int flags)
  333 {
  334         struct rqhead *rqh;
  335         int pri;
  336 
  337         pri = td->td_priority / RQ_PPQ;
  338         td->td_rqindex = pri;
  339         runq_setbit(rq, pri);
  340         rqh = &rq->rq_queues[pri];
  341         CTR4(KTR_RUNQ, "runq_add: td=%p pri=%d %d rqh=%p",
  342             td, td->td_priority, pri, rqh);
  343         if (flags & SRQ_PREEMPTED) {
  344                 TAILQ_INSERT_HEAD(rqh, td, td_runq);
  345         } else {
  346                 TAILQ_INSERT_TAIL(rqh, td, td_runq);
  347         }
  348 }
  349 
  350 void
  351 runq_add_pri(struct runq *rq, struct thread *td, u_char pri, int flags)
  352 {
  353         struct rqhead *rqh;
  354 
  355         KASSERT(pri < RQ_NQS, ("runq_add_pri: %d out of range", pri));
  356         td->td_rqindex = pri;
  357         runq_setbit(rq, pri);
  358         rqh = &rq->rq_queues[pri];
  359         CTR4(KTR_RUNQ, "runq_add_pri: td=%p pri=%d idx=%d rqh=%p",
  360             td, td->td_priority, pri, rqh);
  361         if (flags & SRQ_PREEMPTED) {
  362                 TAILQ_INSERT_HEAD(rqh, td, td_runq);
  363         } else {
  364                 TAILQ_INSERT_TAIL(rqh, td, td_runq);
  365         }
  366 }
  367 /*
  368  * Return true if there are runnable processes of any priority on the run
  369  * queue, false otherwise.  Has no side effects, does not modify the run
  370  * queue structure.
  371  */
  372 int
  373 runq_check(struct runq *rq)
  374 {
  375         struct rqbits *rqb;
  376         int i;
  377 
  378         rqb = &rq->rq_status;
  379         for (i = 0; i < RQB_LEN; i++)
  380                 if (rqb->rqb_bits[i]) {
  381                         CTR2(KTR_RUNQ, "runq_check: bits=%#x i=%d",
  382                             rqb->rqb_bits[i], i);
  383                         return (1);
  384                 }
  385         CTR0(KTR_RUNQ, "runq_check: empty");
  386 
  387         return (0);
  388 }
  389 
  390 /*
  391  * Find the highest priority process on the run queue.
  392  */
  393 struct thread *
  394 runq_choose_fuzz(struct runq *rq, int fuzz)
  395 {
  396         struct rqhead *rqh;
  397         struct thread *td;
  398         int pri;
  399 
  400         while ((pri = runq_findbit(rq)) != -1) {
  401                 rqh = &rq->rq_queues[pri];
  402                 /* fuzz == 1 is normal.. 0 or less are ignored */
  403                 if (fuzz > 1) {
  404                         /*
  405                          * In the first couple of entries, check if
  406                          * there is one for our CPU as a preference.
  407                          */
  408                         int count = fuzz;
  409                         int cpu = PCPU_GET(cpuid);
  410                         struct thread *td2;
  411                         td2 = td = TAILQ_FIRST(rqh);
  412 
  413                         while (count-- && td2) {
  414                                 if (td2->td_lastcpu == cpu) {
  415                                         td = td2;
  416                                         break;
  417                                 }
  418                                 td2 = TAILQ_NEXT(td2, td_runq);
  419                         }
  420                 } else
  421                         td = TAILQ_FIRST(rqh);
  422                 KASSERT(td != NULL, ("runq_choose_fuzz: no proc on busy queue"));
  423                 CTR3(KTR_RUNQ,
  424                     "runq_choose_fuzz: pri=%d thread=%p rqh=%p", pri, td, rqh);
  425                 return (td);
  426         }
  427         CTR1(KTR_RUNQ, "runq_choose_fuzz: idleproc pri=%d", pri);
  428 
  429         return (NULL);
  430 }
  431 
  432 /*
  433  * Find the highest priority process on the run queue.
  434  */
  435 struct thread *
  436 runq_choose(struct runq *rq)
  437 {
  438         struct rqhead *rqh;
  439         struct thread *td;
  440         int pri;
  441 
  442         while ((pri = runq_findbit(rq)) != -1) {
  443                 rqh = &rq->rq_queues[pri];
  444                 td = TAILQ_FIRST(rqh);
  445                 KASSERT(td != NULL, ("runq_choose: no thread on busy queue"));
  446                 CTR3(KTR_RUNQ,
  447                     "runq_choose: pri=%d thread=%p rqh=%p", pri, td, rqh);
  448                 return (td);
  449         }
  450         CTR1(KTR_RUNQ, "runq_choose: idlethread pri=%d", pri);
  451 
  452         return (NULL);
  453 }
  454 
  455 struct thread *
  456 runq_choose_from(struct runq *rq, u_char idx)
  457 {
  458         struct rqhead *rqh;
  459         struct thread *td;
  460         int pri;
  461 
  462         if ((pri = runq_findbit_from(rq, idx)) != -1) {
  463                 rqh = &rq->rq_queues[pri];
  464                 td = TAILQ_FIRST(rqh);
  465                 KASSERT(td != NULL, ("runq_choose: no thread on busy queue"));
  466                 CTR4(KTR_RUNQ,
  467                     "runq_choose_from: pri=%d thread=%p idx=%d rqh=%p",
  468                     pri, td, td->td_rqindex, rqh);
  469                 return (td);
  470         }
  471         CTR1(KTR_RUNQ, "runq_choose_from: idlethread pri=%d", pri);
  472 
  473         return (NULL);
  474 }
  475 /*
  476  * Remove the thread from the queue specified by its priority, and clear the
  477  * corresponding status bit if the queue becomes empty.
  478  * Caller must set state afterwards.
  479  */
  480 void
  481 runq_remove(struct runq *rq, struct thread *td)
  482 {
  483 
  484         runq_remove_idx(rq, td, NULL);
  485 }
  486 
  487 void
  488 runq_remove_idx(struct runq *rq, struct thread *td, u_char *idx)
  489 {
  490         struct rqhead *rqh;
  491         u_char pri;
  492 
  493         KASSERT(td->td_flags & TDF_INMEM,
  494                 ("runq_remove_idx: thread swapped out"));
  495         pri = td->td_rqindex;
  496         KASSERT(pri < RQ_NQS, ("runq_remove_idx: Invalid index %d\n", pri));
  497         rqh = &rq->rq_queues[pri];
  498         CTR4(KTR_RUNQ, "runq_remove_idx: td=%p, pri=%d %d rqh=%p",
  499             td, td->td_priority, pri, rqh);
  500         TAILQ_REMOVE(rqh, td, td_runq);
  501         if (TAILQ_EMPTY(rqh)) {
  502                 CTR0(KTR_RUNQ, "runq_remove_idx: empty");
  503                 runq_clrbit(rq, pri);
  504                 if (idx != NULL && *idx == pri)
  505                         *idx = (pri + 1) % RQ_NQS;
  506         }
  507 }

Cache object: 949de3fba6cfe0626aa3ee3887c64c3f


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.