The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/kern_switch.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 2001 Jake Burkholder <jake@FreeBSD.org>
    3  * All rights reserved.
    4  *
    5  * Redistribution and use in source and binary forms, with or without
    6  * modification, are permitted provided that the following conditions
    7  * are met:
    8  * 1. Redistributions of source code must retain the above copyright
    9  *    notice, this list of conditions and the following disclaimer.
   10  * 2. Redistributions in binary form must reproduce the above copyright
   11  *    notice, this list of conditions and the following disclaimer in the
   12  *    documentation and/or other materials provided with the distribution.
   13  *
   14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   24  * SUCH DAMAGE.
   25  */
   26 
   27 
   28 #include <sys/cdefs.h>
   29 __FBSDID("$FreeBSD: releng/10.1/sys/kern/kern_switch.c 244046 2012-12-09 04:54:22Z attilio $");
   30 
   31 #include "opt_sched.h"
   32 
   33 #include <sys/param.h>
   34 #include <sys/systm.h>
   35 #include <sys/kdb.h>
   36 #include <sys/kernel.h>
   37 #include <sys/ktr.h>
   38 #include <sys/lock.h>
   39 #include <sys/mutex.h>
   40 #include <sys/proc.h>
   41 #include <sys/queue.h>
   42 #include <sys/sched.h>
   43 #include <sys/smp.h>
   44 #include <sys/sysctl.h>
   45 
   46 #include <machine/cpu.h>
   47 
   48 /* Uncomment this to enable logging of critical_enter/exit. */
   49 #if 0
   50 #define KTR_CRITICAL    KTR_SCHED
   51 #else
   52 #define KTR_CRITICAL    0
   53 #endif
   54 
   55 #ifdef FULL_PREEMPTION
   56 #ifndef PREEMPTION
   57 #error "The FULL_PREEMPTION option requires the PREEMPTION option"
   58 #endif
   59 #endif
   60 
   61 CTASSERT((RQB_BPW * RQB_LEN) == RQ_NQS);
   62 
   63 /*
   64  * kern.sched.preemption allows user space to determine if preemption support
   65  * is compiled in or not.  It is not currently a boot or runtime flag that
   66  * can be changed.
   67  */
   68 #ifdef PREEMPTION
   69 static int kern_sched_preemption = 1;
   70 #else
   71 static int kern_sched_preemption = 0;
   72 #endif
   73 SYSCTL_INT(_kern_sched, OID_AUTO, preemption, CTLFLAG_RD,
   74     &kern_sched_preemption, 0, "Kernel preemption enabled");
   75 
   76 /*
   77  * Support for scheduler stats exported via kern.sched.stats.  All stats may
   78  * be reset with kern.sched.stats.reset = 1.  Stats may be defined elsewhere
   79  * with SCHED_STAT_DEFINE().
   80  */
   81 #ifdef SCHED_STATS
   82 SYSCTL_NODE(_kern_sched, OID_AUTO, stats, CTLFLAG_RW, 0, "switch stats");
   83 
   84 /* Switch reasons from mi_switch(). */
   85 DPCPU_DEFINE(long, sched_switch_stats[SWT_COUNT]);
   86 SCHED_STAT_DEFINE_VAR(uncategorized,
   87     &DPCPU_NAME(sched_switch_stats[SWT_NONE]), "");
   88 SCHED_STAT_DEFINE_VAR(preempt,
   89     &DPCPU_NAME(sched_switch_stats[SWT_PREEMPT]), "");
   90 SCHED_STAT_DEFINE_VAR(owepreempt,
   91     &DPCPU_NAME(sched_switch_stats[SWT_OWEPREEMPT]), "");
   92 SCHED_STAT_DEFINE_VAR(turnstile,
   93     &DPCPU_NAME(sched_switch_stats[SWT_TURNSTILE]), "");
   94 SCHED_STAT_DEFINE_VAR(sleepq,
   95     &DPCPU_NAME(sched_switch_stats[SWT_SLEEPQ]), "");
   96 SCHED_STAT_DEFINE_VAR(sleepqtimo,
   97     &DPCPU_NAME(sched_switch_stats[SWT_SLEEPQTIMO]), "");
   98 SCHED_STAT_DEFINE_VAR(relinquish, 
   99     &DPCPU_NAME(sched_switch_stats[SWT_RELINQUISH]), "");
  100 SCHED_STAT_DEFINE_VAR(needresched,
  101     &DPCPU_NAME(sched_switch_stats[SWT_NEEDRESCHED]), "");
  102 SCHED_STAT_DEFINE_VAR(idle, 
  103     &DPCPU_NAME(sched_switch_stats[SWT_IDLE]), "");
  104 SCHED_STAT_DEFINE_VAR(iwait,
  105     &DPCPU_NAME(sched_switch_stats[SWT_IWAIT]), "");
  106 SCHED_STAT_DEFINE_VAR(suspend,
  107     &DPCPU_NAME(sched_switch_stats[SWT_SUSPEND]), "");
  108 SCHED_STAT_DEFINE_VAR(remotepreempt,
  109     &DPCPU_NAME(sched_switch_stats[SWT_REMOTEPREEMPT]), "");
  110 SCHED_STAT_DEFINE_VAR(remotewakeidle,
  111     &DPCPU_NAME(sched_switch_stats[SWT_REMOTEWAKEIDLE]), "");
  112 
  113 static int
  114 sysctl_stats_reset(SYSCTL_HANDLER_ARGS)
  115 {
  116         struct sysctl_oid *p;
  117         uintptr_t counter;
  118         int error;
  119         int val;
  120         int i;
  121 
  122         val = 0;
  123         error = sysctl_handle_int(oidp, &val, 0, req);
  124         if (error != 0 || req->newptr == NULL)
  125                 return (error);
  126         if (val == 0)
  127                 return (0);
  128         /*
  129          * Traverse the list of children of _kern_sched_stats and reset each
  130          * to 0.  Skip the reset entry.
  131          */
  132         SLIST_FOREACH(p, oidp->oid_parent, oid_link) {
  133                 if (p == oidp || p->oid_arg1 == NULL)
  134                         continue;
  135                 counter = (uintptr_t)p->oid_arg1;
  136                 CPU_FOREACH(i) {
  137                         *(long *)(dpcpu_off[i] + counter) = 0;
  138                 }
  139         }
  140         return (0);
  141 }
  142 
  143 SYSCTL_PROC(_kern_sched_stats, OID_AUTO, reset, CTLTYPE_INT | CTLFLAG_WR, NULL,
  144     0, sysctl_stats_reset, "I", "Reset scheduler statistics");
  145 #endif
  146 
  147 /************************************************************************
  148  * Functions that manipulate runnability from a thread perspective.     *
  149  ************************************************************************/
  150 /*
  151  * Select the thread that will be run next.
  152  */
  153 struct thread *
  154 choosethread(void)
  155 {
  156         struct thread *td;
  157 
  158 retry:
  159         td = sched_choose();
  160 
  161         /*
  162          * If we are in panic, only allow system threads,
  163          * plus the one we are running in, to be run.
  164          */
  165         if (panicstr && ((td->td_proc->p_flag & P_SYSTEM) == 0 &&
  166             (td->td_flags & TDF_INPANIC) == 0)) {
  167                 /* note that it is no longer on the run queue */
  168                 TD_SET_CAN_RUN(td);
  169                 goto retry;
  170         }
  171 
  172         TD_SET_RUNNING(td);
  173         return (td);
  174 }
  175 
  176 /*
  177  * Kernel thread preemption implementation.  Critical sections mark
  178  * regions of code in which preemptions are not allowed.
  179  *
  180  * It might seem a good idea to inline critical_enter() but, in order
  181  * to prevent instructions reordering by the compiler, a __compiler_membar()
  182  * would have to be used here (the same as sched_pin()).  The performance
  183  * penalty imposed by the membar could, then, produce slower code than
  184  * the function call itself, for most cases.
  185  */
  186 void
  187 critical_enter(void)
  188 {
  189         struct thread *td;
  190 
  191         td = curthread;
  192         td->td_critnest++;
  193         CTR4(KTR_CRITICAL, "critical_enter by thread %p (%ld, %s) to %d", td,
  194             (long)td->td_proc->p_pid, td->td_name, td->td_critnest);
  195 }
  196 
  197 void
  198 critical_exit(void)
  199 {
  200         struct thread *td;
  201         int flags;
  202 
  203         td = curthread;
  204         KASSERT(td->td_critnest != 0,
  205             ("critical_exit: td_critnest == 0"));
  206 
  207         if (td->td_critnest == 1) {
  208                 td->td_critnest = 0;
  209                 if (td->td_owepreempt && !kdb_active) {
  210                         td->td_critnest = 1;
  211                         thread_lock(td);
  212                         td->td_critnest--;
  213                         flags = SW_INVOL | SW_PREEMPT;
  214                         if (TD_IS_IDLETHREAD(td))
  215                                 flags |= SWT_IDLE;
  216                         else
  217                                 flags |= SWT_OWEPREEMPT;
  218                         mi_switch(flags, NULL);
  219                         thread_unlock(td);
  220                 }
  221         } else
  222                 td->td_critnest--;
  223 
  224         CTR4(KTR_CRITICAL, "critical_exit by thread %p (%ld, %s) to %d", td,
  225             (long)td->td_proc->p_pid, td->td_name, td->td_critnest);
  226 }
  227 
  228 /************************************************************************
  229  * SYSTEM RUN QUEUE manipulations and tests                             *
  230  ************************************************************************/
  231 /*
  232  * Initialize a run structure.
  233  */
  234 void
  235 runq_init(struct runq *rq)
  236 {
  237         int i;
  238 
  239         bzero(rq, sizeof *rq);
  240         for (i = 0; i < RQ_NQS; i++)
  241                 TAILQ_INIT(&rq->rq_queues[i]);
  242 }
  243 
  244 /*
  245  * Clear the status bit of the queue corresponding to priority level pri,
  246  * indicating that it is empty.
  247  */
  248 static __inline void
  249 runq_clrbit(struct runq *rq, int pri)
  250 {
  251         struct rqbits *rqb;
  252 
  253         rqb = &rq->rq_status;
  254         CTR4(KTR_RUNQ, "runq_clrbit: bits=%#x %#x bit=%#x word=%d",
  255             rqb->rqb_bits[RQB_WORD(pri)],
  256             rqb->rqb_bits[RQB_WORD(pri)] & ~RQB_BIT(pri),
  257             RQB_BIT(pri), RQB_WORD(pri));
  258         rqb->rqb_bits[RQB_WORD(pri)] &= ~RQB_BIT(pri);
  259 }
  260 
  261 /*
  262  * Find the index of the first non-empty run queue.  This is done by
  263  * scanning the status bits, a set bit indicates a non-empty queue.
  264  */
  265 static __inline int
  266 runq_findbit(struct runq *rq)
  267 {
  268         struct rqbits *rqb;
  269         int pri;
  270         int i;
  271 
  272         rqb = &rq->rq_status;
  273         for (i = 0; i < RQB_LEN; i++)
  274                 if (rqb->rqb_bits[i]) {
  275                         pri = RQB_FFS(rqb->rqb_bits[i]) + (i << RQB_L2BPW);
  276                         CTR3(KTR_RUNQ, "runq_findbit: bits=%#x i=%d pri=%d",
  277                             rqb->rqb_bits[i], i, pri);
  278                         return (pri);
  279                 }
  280 
  281         return (-1);
  282 }
  283 
  284 static __inline int
  285 runq_findbit_from(struct runq *rq, u_char pri)
  286 {
  287         struct rqbits *rqb;
  288         rqb_word_t mask;
  289         int i;
  290 
  291         /*
  292          * Set the mask for the first word so we ignore priorities before 'pri'.
  293          */
  294         mask = (rqb_word_t)-1 << (pri & (RQB_BPW - 1));
  295         rqb = &rq->rq_status;
  296 again:
  297         for (i = RQB_WORD(pri); i < RQB_LEN; mask = -1, i++) {
  298                 mask = rqb->rqb_bits[i] & mask;
  299                 if (mask == 0)
  300                         continue;
  301                 pri = RQB_FFS(mask) + (i << RQB_L2BPW);
  302                 CTR3(KTR_RUNQ, "runq_findbit_from: bits=%#x i=%d pri=%d",
  303                     mask, i, pri);
  304                 return (pri);
  305         }
  306         if (pri == 0)
  307                 return (-1);
  308         /*
  309          * Wrap back around to the beginning of the list just once so we
  310          * scan the whole thing.
  311          */
  312         pri = 0;
  313         goto again;
  314 }
  315 
  316 /*
  317  * Set the status bit of the queue corresponding to priority level pri,
  318  * indicating that it is non-empty.
  319  */
  320 static __inline void
  321 runq_setbit(struct runq *rq, int pri)
  322 {
  323         struct rqbits *rqb;
  324 
  325         rqb = &rq->rq_status;
  326         CTR4(KTR_RUNQ, "runq_setbit: bits=%#x %#x bit=%#x word=%d",
  327             rqb->rqb_bits[RQB_WORD(pri)],
  328             rqb->rqb_bits[RQB_WORD(pri)] | RQB_BIT(pri),
  329             RQB_BIT(pri), RQB_WORD(pri));
  330         rqb->rqb_bits[RQB_WORD(pri)] |= RQB_BIT(pri);
  331 }
  332 
  333 /*
  334  * Add the thread to the queue specified by its priority, and set the
  335  * corresponding status bit.
  336  */
  337 void
  338 runq_add(struct runq *rq, struct thread *td, int flags)
  339 {
  340         struct rqhead *rqh;
  341         int pri;
  342 
  343         pri = td->td_priority / RQ_PPQ;
  344         td->td_rqindex = pri;
  345         runq_setbit(rq, pri);
  346         rqh = &rq->rq_queues[pri];
  347         CTR4(KTR_RUNQ, "runq_add: td=%p pri=%d %d rqh=%p",
  348             td, td->td_priority, pri, rqh);
  349         if (flags & SRQ_PREEMPTED) {
  350                 TAILQ_INSERT_HEAD(rqh, td, td_runq);
  351         } else {
  352                 TAILQ_INSERT_TAIL(rqh, td, td_runq);
  353         }
  354 }
  355 
  356 void
  357 runq_add_pri(struct runq *rq, struct thread *td, u_char pri, int flags)
  358 {
  359         struct rqhead *rqh;
  360 
  361         KASSERT(pri < RQ_NQS, ("runq_add_pri: %d out of range", pri));
  362         td->td_rqindex = pri;
  363         runq_setbit(rq, pri);
  364         rqh = &rq->rq_queues[pri];
  365         CTR4(KTR_RUNQ, "runq_add_pri: td=%p pri=%d idx=%d rqh=%p",
  366             td, td->td_priority, pri, rqh);
  367         if (flags & SRQ_PREEMPTED) {
  368                 TAILQ_INSERT_HEAD(rqh, td, td_runq);
  369         } else {
  370                 TAILQ_INSERT_TAIL(rqh, td, td_runq);
  371         }
  372 }
  373 /*
  374  * Return true if there are runnable processes of any priority on the run
  375  * queue, false otherwise.  Has no side effects, does not modify the run
  376  * queue structure.
  377  */
  378 int
  379 runq_check(struct runq *rq)
  380 {
  381         struct rqbits *rqb;
  382         int i;
  383 
  384         rqb = &rq->rq_status;
  385         for (i = 0; i < RQB_LEN; i++)
  386                 if (rqb->rqb_bits[i]) {
  387                         CTR2(KTR_RUNQ, "runq_check: bits=%#x i=%d",
  388                             rqb->rqb_bits[i], i);
  389                         return (1);
  390                 }
  391         CTR0(KTR_RUNQ, "runq_check: empty");
  392 
  393         return (0);
  394 }
  395 
  396 /*
  397  * Find the highest priority process on the run queue.
  398  */
  399 struct thread *
  400 runq_choose_fuzz(struct runq *rq, int fuzz)
  401 {
  402         struct rqhead *rqh;
  403         struct thread *td;
  404         int pri;
  405 
  406         while ((pri = runq_findbit(rq)) != -1) {
  407                 rqh = &rq->rq_queues[pri];
  408                 /* fuzz == 1 is normal.. 0 or less are ignored */
  409                 if (fuzz > 1) {
  410                         /*
  411                          * In the first couple of entries, check if
  412                          * there is one for our CPU as a preference.
  413                          */
  414                         int count = fuzz;
  415                         int cpu = PCPU_GET(cpuid);
  416                         struct thread *td2;
  417                         td2 = td = TAILQ_FIRST(rqh);
  418 
  419                         while (count-- && td2) {
  420                                 if (td2->td_lastcpu == cpu) {
  421                                         td = td2;
  422                                         break;
  423                                 }
  424                                 td2 = TAILQ_NEXT(td2, td_runq);
  425                         }
  426                 } else
  427                         td = TAILQ_FIRST(rqh);
  428                 KASSERT(td != NULL, ("runq_choose_fuzz: no proc on busy queue"));
  429                 CTR3(KTR_RUNQ,
  430                     "runq_choose_fuzz: pri=%d thread=%p rqh=%p", pri, td, rqh);
  431                 return (td);
  432         }
  433         CTR1(KTR_RUNQ, "runq_choose_fuzz: idleproc pri=%d", pri);
  434 
  435         return (NULL);
  436 }
  437 
  438 /*
  439  * Find the highest priority process on the run queue.
  440  */
  441 struct thread *
  442 runq_choose(struct runq *rq)
  443 {
  444         struct rqhead *rqh;
  445         struct thread *td;
  446         int pri;
  447 
  448         while ((pri = runq_findbit(rq)) != -1) {
  449                 rqh = &rq->rq_queues[pri];
  450                 td = TAILQ_FIRST(rqh);
  451                 KASSERT(td != NULL, ("runq_choose: no thread on busy queue"));
  452                 CTR3(KTR_RUNQ,
  453                     "runq_choose: pri=%d thread=%p rqh=%p", pri, td, rqh);
  454                 return (td);
  455         }
  456         CTR1(KTR_RUNQ, "runq_choose: idlethread pri=%d", pri);
  457 
  458         return (NULL);
  459 }
  460 
  461 struct thread *
  462 runq_choose_from(struct runq *rq, u_char idx)
  463 {
  464         struct rqhead *rqh;
  465         struct thread *td;
  466         int pri;
  467 
  468         if ((pri = runq_findbit_from(rq, idx)) != -1) {
  469                 rqh = &rq->rq_queues[pri];
  470                 td = TAILQ_FIRST(rqh);
  471                 KASSERT(td != NULL, ("runq_choose: no thread on busy queue"));
  472                 CTR4(KTR_RUNQ,
  473                     "runq_choose_from: pri=%d thread=%p idx=%d rqh=%p",
  474                     pri, td, td->td_rqindex, rqh);
  475                 return (td);
  476         }
  477         CTR1(KTR_RUNQ, "runq_choose_from: idlethread pri=%d", pri);
  478 
  479         return (NULL);
  480 }
  481 /*
  482  * Remove the thread from the queue specified by its priority, and clear the
  483  * corresponding status bit if the queue becomes empty.
  484  * Caller must set state afterwards.
  485  */
  486 void
  487 runq_remove(struct runq *rq, struct thread *td)
  488 {
  489 
  490         runq_remove_idx(rq, td, NULL);
  491 }
  492 
  493 void
  494 runq_remove_idx(struct runq *rq, struct thread *td, u_char *idx)
  495 {
  496         struct rqhead *rqh;
  497         u_char pri;
  498 
  499         KASSERT(td->td_flags & TDF_INMEM,
  500                 ("runq_remove_idx: thread swapped out"));
  501         pri = td->td_rqindex;
  502         KASSERT(pri < RQ_NQS, ("runq_remove_idx: Invalid index %d\n", pri));
  503         rqh = &rq->rq_queues[pri];
  504         CTR4(KTR_RUNQ, "runq_remove_idx: td=%p, pri=%d %d rqh=%p",
  505             td, td->td_priority, pri, rqh);
  506         TAILQ_REMOVE(rqh, td, td_runq);
  507         if (TAILQ_EMPTY(rqh)) {
  508                 CTR0(KTR_RUNQ, "runq_remove_idx: empty");
  509                 runq_clrbit(rq, pri);
  510                 if (idx != NULL && *idx == pri)
  511                         *idx = (pri + 1) % RQ_NQS;
  512         }
  513 }

Cache object: c9451ab6a827e7bf9e318b8659e6096f


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.