The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/kern_synch.c

Version: -  FREEBSD  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-2  -  FREEBSD-11-1  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-4  -  FREEBSD-10-3  -  FREEBSD-10-2  -  FREEBSD-10-1  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-3  -  FREEBSD-9-2  -  FREEBSD-9-1  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-4  -  FREEBSD-8-3  -  FREEBSD-8-2  -  FREEBSD-8-1  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-4  -  FREEBSD-7-3  -  FREEBSD-7-2  -  FREEBSD-7-1  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-4  -  FREEBSD-6-3  -  FREEBSD-6-2  -  FREEBSD-6-1  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-5  -  FREEBSD-5-4  -  FREEBSD-5-3  -  FREEBSD-5-2  -  FREEBSD-5-1  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  linux-2.6  -  linux-2.4.22  -  MK83  -  MK84  -  PLAN9  -  DFBSD  -  NETBSD  -  NETBSD5  -  NETBSD4  -  NETBSD3  -  NETBSD20  -  OPENBSD  -  xnu-517  -  xnu-792  -  xnu-792.6.70  -  xnu-1228  -  xnu-1456.1.26  -  xnu-1699.24.8  -  xnu-2050.18.24  -  OPENSOLARIS  -  minix-3-1-1 
SearchContext: -  none  -  3  -  10 

    1 /*      $NetBSD: kern_synch.c,v 1.148.2.1 2005/10/21 17:39:40 riz Exp $ */
    2 
    3 /*-
    4  * Copyright (c) 1999, 2000, 2004 The NetBSD Foundation, Inc.
    5  * All rights reserved.
    6  *
    7  * This code is derived from software contributed to The NetBSD Foundation
    8  * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
    9  * NASA Ames Research Center.
   10  * This code is derived from software contributed to The NetBSD Foundation
   11  * by Charles M. Hannum.
   12  *
   13  * Redistribution and use in source and binary forms, with or without
   14  * modification, are permitted provided that the following conditions
   15  * are met:
   16  * 1. Redistributions of source code must retain the above copyright
   17  *    notice, this list of conditions and the following disclaimer.
   18  * 2. Redistributions in binary form must reproduce the above copyright
   19  *    notice, this list of conditions and the following disclaimer in the
   20  *    documentation and/or other materials provided with the distribution.
   21  * 3. All advertising materials mentioning features or use of this software
   22  *    must display the following acknowledgement:
   23  *      This product includes software developed by the NetBSD
   24  *      Foundation, Inc. and its contributors.
   25  * 4. Neither the name of The NetBSD Foundation nor the names of its
   26  *    contributors may be used to endorse or promote products derived
   27  *    from this software without specific prior written permission.
   28  *
   29  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
   30  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
   31  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
   32  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
   33  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
   34  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
   35  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
   36  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
   37  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
   38  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   39  * POSSIBILITY OF SUCH DAMAGE.
   40  */
   41 
   42 /*-
   43  * Copyright (c) 1982, 1986, 1990, 1991, 1993
   44  *      The Regents of the University of California.  All rights reserved.
   45  * (c) UNIX System Laboratories, Inc.
   46  * All or some portions of this file are derived from material licensed
   47  * to the University of California by American Telephone and Telegraph
   48  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
   49  * the permission of UNIX System Laboratories, Inc.
   50  *
   51  * Redistribution and use in source and binary forms, with or without
   52  * modification, are permitted provided that the following conditions
   53  * are met:
   54  * 1. Redistributions of source code must retain the above copyright
   55  *    notice, this list of conditions and the following disclaimer.
   56  * 2. Redistributions in binary form must reproduce the above copyright
   57  *    notice, this list of conditions and the following disclaimer in the
   58  *    documentation and/or other materials provided with the distribution.
   59  * 3. Neither the name of the University nor the names of its contributors
   60  *    may be used to endorse or promote products derived from this software
   61  *    without specific prior written permission.
   62  *
   63  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   64  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   65  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   66  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   67  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   68  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   69  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   70  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   71  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   72  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   73  * SUCH DAMAGE.
   74  *
   75  *      @(#)kern_synch.c        8.9 (Berkeley) 5/19/95
   76  */
   77 
   78 #include <sys/cdefs.h>
   79 __KERNEL_RCSID(0, "$NetBSD: kern_synch.c,v 1.148.2.1 2005/10/21 17:39:40 riz Exp $");
   80 
   81 #include "opt_ddb.h"
   82 #include "opt_ktrace.h"
   83 #include "opt_kstack.h"
   84 #include "opt_lockdebug.h"
   85 #include "opt_multiprocessor.h"
   86 #include "opt_perfctrs.h"
   87 
   88 #include <sys/param.h>
   89 #include <sys/systm.h>
   90 #include <sys/callout.h>
   91 #include <sys/proc.h>
   92 #include <sys/kernel.h>
   93 #include <sys/buf.h>
   94 #if defined(PERFCTRS)
   95 #include <sys/pmc.h>
   96 #endif
   97 #include <sys/signalvar.h>
   98 #include <sys/resourcevar.h>
   99 #include <sys/sched.h>
  100 #include <sys/sa.h>
  101 #include <sys/savar.h>
  102 
  103 #include <uvm/uvm_extern.h>
  104 
  105 #ifdef KTRACE
  106 #include <sys/ktrace.h>
  107 #endif
  108 
  109 #include <machine/cpu.h>
  110 
  111 int     lbolt;                  /* once a second sleep address */
  112 int     rrticks;                /* number of hardclock ticks per roundrobin() */
  113 
  114 /*
  115  * The global scheduler state.
  116  */
  117 struct prochd sched_qs[RUNQUE_NQS];     /* run queues */
  118 __volatile u_int32_t sched_whichqs;     /* bitmap of non-empty queues */
  119 struct slpque sched_slpque[SLPQUE_TABLESIZE]; /* sleep queues */
  120 
  121 struct simplelock sched_lock = SIMPLELOCK_INITIALIZER;
  122 
  123 void schedcpu(void *);
  124 void updatepri(struct lwp *);
  125 void endtsleep(void *);
  126 
  127 __inline void sa_awaken(struct lwp *);
  128 __inline void awaken(struct lwp *);
  129 
  130 struct callout schedcpu_ch = CALLOUT_INITIALIZER_SETFUNC(schedcpu, NULL);
  131 
  132 
  133 
  134 /*
  135  * Force switch among equal priority processes every 100ms.
  136  * Called from hardclock every hz/10 == rrticks hardclock ticks.
  137  */
  138 /* ARGSUSED */
  139 void
  140 roundrobin(struct cpu_info *ci)
  141 {
  142         struct schedstate_percpu *spc = &ci->ci_schedstate;
  143 
  144         spc->spc_rrticks = rrticks;
  145 
  146         if (curlwp != NULL) {
  147                 if (spc->spc_flags & SPCF_SEENRR) {
  148                         /*
  149                          * The process has already been through a roundrobin
  150                          * without switching and may be hogging the CPU.
  151                          * Indicate that the process should yield.
  152                          */
  153                         spc->spc_flags |= SPCF_SHOULDYIELD;
  154                 } else
  155                         spc->spc_flags |= SPCF_SEENRR;
  156         }
  157         need_resched(curcpu());
  158 }
  159 
  160 /*
  161  * Constants for digital decay and forget:
  162  *      90% of (p_estcpu) usage in 5 * loadav time
  163  *      95% of (p_pctcpu) usage in 60 seconds (load insensitive)
  164  *          Note that, as ps(1) mentions, this can let percentages
  165  *          total over 100% (I've seen 137.9% for 3 processes).
  166  *
  167  * Note that hardclock updates p_estcpu and p_cpticks independently.
  168  *
  169  * We wish to decay away 90% of p_estcpu in (5 * loadavg) seconds.
  170  * That is, the system wants to compute a value of decay such
  171  * that the following for loop:
  172  *      for (i = 0; i < (5 * loadavg); i++)
  173  *              p_estcpu *= decay;
  174  * will compute
  175  *      p_estcpu *= 0.1;
  176  * for all values of loadavg:
  177  *
  178  * Mathematically this loop can be expressed by saying:
  179  *      decay ** (5 * loadavg) ~= .1
  180  *
  181  * The system computes decay as:
  182  *      decay = (2 * loadavg) / (2 * loadavg + 1)
  183  *
  184  * We wish to prove that the system's computation of decay
  185  * will always fulfill the equation:
  186  *      decay ** (5 * loadavg) ~= .1
  187  *
  188  * If we compute b as:
  189  *      b = 2 * loadavg
  190  * then
  191  *      decay = b / (b + 1)
  192  *
  193  * We now need to prove two things:
  194  *      1) Given factor ** (5 * loadavg) ~= .1, prove factor == b/(b+1)
  195  *      2) Given b/(b+1) ** power ~= .1, prove power == (5 * loadavg)
  196  *
  197  * Facts:
  198  *         For x close to zero, exp(x) =~ 1 + x, since
  199  *              exp(x) = 0! + x**1/1! + x**2/2! + ... .
  200  *              therefore exp(-1/b) =~ 1 - (1/b) = (b-1)/b.
  201  *         For x close to zero, ln(1+x) =~ x, since
  202  *              ln(1+x) = x - x**2/2 + x**3/3 - ...     -1 < x < 1
  203  *              therefore ln(b/(b+1)) = ln(1 - 1/(b+1)) =~ -1/(b+1).
  204  *         ln(.1) =~ -2.30
  205  *
  206  * Proof of (1):
  207  *    Solve (factor)**(power) =~ .1 given power (5*loadav):
  208  *      solving for factor,
  209  *      ln(factor) =~ (-2.30/5*loadav), or
  210  *      factor =~ exp(-1/((5/2.30)*loadav)) =~ exp(-1/(2*loadav)) =
  211  *          exp(-1/b) =~ (b-1)/b =~ b/(b+1).                    QED
  212  *
  213  * Proof of (2):
  214  *    Solve (factor)**(power) =~ .1 given factor == (b/(b+1)):
  215  *      solving for power,
  216  *      power*ln(b/(b+1)) =~ -2.30, or
  217  *      power =~ 2.3 * (b + 1) = 4.6*loadav + 2.3 =~ 5*loadav.  QED
  218  *
  219  * Actual power values for the implemented algorithm are as follows:
  220  *      loadav: 1       2       3       4
  221  *      power:  5.68    10.32   14.94   19.55
  222  */
  223 
  224 /* calculations for digital decay to forget 90% of usage in 5*loadav sec */
  225 #define loadfactor(loadav)      (2 * (loadav))
  226 #define decay_cpu(loadfac, cpu) (((loadfac) * (cpu)) / ((loadfac) + FSCALE))
  227 
  228 /* decay 95% of `p_pctcpu' in 60 seconds; see CCPU_SHIFT before changing */
  229 fixpt_t ccpu = 0.95122942450071400909 * FSCALE;         /* exp(-1/20) */
  230 
  231 /*
  232  * If `ccpu' is not equal to `exp(-1/20)' and you still want to use the
  233  * faster/more-accurate formula, you'll have to estimate CCPU_SHIFT below
  234  * and possibly adjust FSHIFT in "param.h" so that (FSHIFT >= CCPU_SHIFT).
  235  *
  236  * To estimate CCPU_SHIFT for exp(-1/20), the following formula was used:
  237  *      1 - exp(-1/20) ~= 0.0487 ~= 0.0488 == 1 (fixed pt, *11* bits).
  238  *
  239  * If you dont want to bother with the faster/more-accurate formula, you
  240  * can set CCPU_SHIFT to (FSHIFT + 1) which will use a slower/less-accurate
  241  * (more general) method of calculating the %age of CPU used by a process.
  242  */
  243 #define CCPU_SHIFT      11
  244 
  245 /*
  246  * Recompute process priorities, every hz ticks.
  247  */
  248 /* ARGSUSED */
  249 void
  250 schedcpu(void *arg)
  251 {
  252         fixpt_t loadfac = loadfactor(averunnable.ldavg[0]);
  253         struct lwp *l;
  254         struct proc *p;
  255         int s, minslp;
  256         unsigned int newcpu;
  257         int clkhz;
  258 
  259         proclist_lock_read();
  260         PROCLIST_FOREACH(p, &allproc) {
  261                 /*
  262                  * Increment time in/out of memory and sleep time
  263                  * (if sleeping).  We ignore overflow; with 16-bit int's
  264                  * (remember them?) overflow takes 45 days.
  265                  */
  266                 minslp = 2;
  267                 LIST_FOREACH(l, &p->p_lwps, l_sibling) {
  268                         l->l_swtime++;
  269                         if (l->l_stat == LSSLEEP || l->l_stat == LSSTOP ||
  270                             l->l_stat == LSSUSPENDED) {
  271                                 l->l_slptime++;
  272                                 minslp = min(minslp, l->l_slptime);
  273                         } else
  274                                 minslp = 0;
  275                 }
  276                 p->p_pctcpu = (p->p_pctcpu * ccpu) >> FSHIFT;
  277                 /*
  278                  * If the process has slept the entire second,
  279                  * stop recalculating its priority until it wakes up.
  280                  */
  281                 if (minslp > 1)
  282                         continue;
  283                 s = splstatclock();     /* prevent state changes */
  284                 /*
  285                  * p_pctcpu is only for ps.
  286                  */
  287                 clkhz = stathz != 0 ? stathz : hz;
  288 #if     (FSHIFT >= CCPU_SHIFT)
  289                 p->p_pctcpu += (clkhz == 100)?
  290                         ((fixpt_t) p->p_cpticks) << (FSHIFT - CCPU_SHIFT):
  291                         100 * (((fixpt_t) p->p_cpticks)
  292                                 << (FSHIFT - CCPU_SHIFT)) / clkhz;
  293 #else
  294                 p->p_pctcpu += ((FSCALE - ccpu) *
  295                         (p->p_cpticks * FSCALE / clkhz)) >> FSHIFT;
  296 #endif
  297                 p->p_cpticks = 0;
  298                 newcpu = (u_int)decay_cpu(loadfac, p->p_estcpu);
  299                 p->p_estcpu = newcpu;
  300                 splx(s);        /* Done with the process CPU ticks update */
  301                 SCHED_LOCK(s);
  302                 LIST_FOREACH(l, &p->p_lwps, l_sibling) {
  303                         if (l->l_slptime > 1)
  304                                 continue;
  305                         resetpriority(l);
  306                         if (l->l_priority >= PUSER) {
  307                                 if (l->l_stat == LSRUN &&
  308                                     (l->l_flag & L_INMEM) &&
  309                                     (l->l_priority / PPQ) != (l->l_usrpri / PPQ)) {
  310                                         remrunqueue(l);
  311                                         l->l_priority = l->l_usrpri;
  312                                         setrunqueue(l);
  313                                 } else
  314                                         l->l_priority = l->l_usrpri;
  315                         }
  316                 }
  317                 SCHED_UNLOCK(s);
  318         }
  319         proclist_unlock_read();
  320         uvm_meter();
  321         wakeup((caddr_t)&lbolt);
  322         callout_schedule(&schedcpu_ch, hz);
  323 }
  324 
  325 /*
  326  * Recalculate the priority of a process after it has slept for a while.
  327  * For all load averages >= 1 and max p_estcpu of 255, sleeping for at
  328  * least six times the loadfactor will decay p_estcpu to zero.
  329  */
  330 void
  331 updatepri(struct lwp *l)
  332 {
  333         struct proc *p = l->l_proc;
  334         unsigned int newcpu;
  335         fixpt_t loadfac;
  336 
  337         SCHED_ASSERT_LOCKED();
  338 
  339         newcpu = p->p_estcpu;
  340         loadfac = loadfactor(averunnable.ldavg[0]);
  341 
  342         if (l->l_slptime > 5 * loadfac)
  343                 p->p_estcpu = 0; /* XXX NJWLWP */
  344         else {
  345                 l->l_slptime--; /* the first time was done in schedcpu */
  346                 while (newcpu && --l->l_slptime)
  347                         newcpu = (int) decay_cpu(loadfac, newcpu);
  348                 p->p_estcpu = newcpu;
  349         }
  350         resetpriority(l);
  351 }
  352 
  353 /*
  354  * During autoconfiguration or after a panic, a sleep will simply
  355  * lower the priority briefly to allow interrupts, then return.
  356  * The priority to be used (safepri) is machine-dependent, thus this
  357  * value is initialized and maintained in the machine-dependent layers.
  358  * This priority will typically be 0, or the lowest priority
  359  * that is safe for use on the interrupt stack; it can be made
  360  * higher to block network software interrupts after panics.
  361  */
  362 int safepri;
  363 
  364 /*
  365  * General sleep call.  Suspends the current process until a wakeup is
  366  * performed on the specified identifier.  The process will then be made
  367  * runnable with the specified priority.  Sleeps at most timo/hz seconds
  368  * (0 means no timeout).  If pri includes PCATCH flag, signals are checked
  369  * before and after sleeping, else signals are not checked.  Returns 0 if
  370  * awakened, EWOULDBLOCK if the timeout expires.  If PCATCH is set and a
  371  * signal needs to be delivered, ERESTART is returned if the current system
  372  * call should be restarted if possible, and EINTR is returned if the system
  373  * call should be interrupted by the signal (return EINTR).
  374  *
  375  * The interlock is held until the scheduler_slock is acquired.  The
  376  * interlock will be locked before returning back to the caller
  377  * unless the PNORELOCK flag is specified, in which case the
  378  * interlock will always be unlocked upon return.
  379  */
  380 int
  381 ltsleep(const void *ident, int priority, const char *wmesg, int timo,
  382     __volatile struct simplelock *interlock)
  383 {
  384         struct lwp *l = curlwp;
  385         struct proc *p = l ? l->l_proc : NULL;
  386         struct slpque *qp;
  387         struct sadata_upcall *sau;
  388         int sig, s;
  389         int catch = priority & PCATCH;
  390         int relock = (priority & PNORELOCK) == 0;
  391         int exiterr = (priority & PNOEXITERR) == 0;
  392 
  393         /*
  394          * XXXSMP
  395          * This is probably bogus.  Figure out what the right
  396          * thing to do here really is.
  397          * Note that not sleeping if ltsleep is called with curlwp == NULL
  398          * in the shutdown case is disgusting but partly necessary given
  399          * how shutdown (barely) works.
  400          */
  401         if (cold || (doing_shutdown && (panicstr || (l == NULL)))) {
  402                 /*
  403                  * After a panic, or during autoconfiguration,
  404                  * just give interrupts a chance, then just return;
  405                  * don't run any other procs or panic below,
  406                  * in case this is the idle process and already asleep.
  407                  */
  408                 s = splhigh();
  409                 splx(safepri);
  410                 splx(s);
  411                 if (interlock != NULL && relock == 0)
  412                         simple_unlock(interlock);
  413                 return (0);
  414         }
  415 
  416         KASSERT(p != NULL);
  417         LOCK_ASSERT(interlock == NULL || simple_lock_held(interlock));
  418 
  419 #ifdef KTRACE
  420         if (KTRPOINT(p, KTR_CSW))
  421                 ktrcsw(p, 1, 0);
  422 #endif
  423 
  424         /*
  425          * XXX We need to allocate the sadata_upcall structure here,
  426          * XXX since we can't sleep while waiting for memory inside
  427          * XXX sa_upcall().  It would be nice if we could safely
  428          * XXX allocate the sadata_upcall structure on the stack, here.
  429          */
  430         if (l->l_flag & L_SA) {
  431                 sau = sadata_upcall_alloc(0);
  432         } else {
  433                 sau = NULL;
  434         }
  435 
  436         SCHED_LOCK(s);
  437 
  438 #ifdef DIAGNOSTIC
  439         if (ident == NULL)
  440                 panic("ltsleep: ident == NULL");
  441         if (l->l_stat != LSONPROC)
  442                 panic("ltsleep: l_stat %d != LSONPROC", l->l_stat);
  443         if (l->l_back != NULL)
  444                 panic("ltsleep: p_back != NULL");
  445 #endif
  446 
  447         l->l_wchan = ident;
  448         l->l_wmesg = wmesg;
  449         l->l_slptime = 0;
  450         l->l_priority = priority & PRIMASK;
  451 
  452         qp = SLPQUE(ident);
  453         if (qp->sq_head == 0)
  454                 qp->sq_head = l;
  455         else {
  456                 *qp->sq_tailp = l;
  457         }
  458         *(qp->sq_tailp = &l->l_forw) = 0;
  459 
  460         if (timo)
  461                 callout_reset(&l->l_tsleep_ch, timo, endtsleep, l);
  462 
  463         /*
  464          * We can now release the interlock; the scheduler_slock
  465          * is held, so a thread can't get in to do wakeup() before
  466          * we do the switch.
  467          *
  468          * XXX We leave the code block here, after inserting ourselves
  469          * on the sleep queue, because we might want a more clever
  470          * data structure for the sleep queues at some point.
  471          */
  472         if (interlock != NULL)
  473                 simple_unlock(interlock);
  474 
  475         /*
  476          * We put ourselves on the sleep queue and start our timeout
  477          * before calling CURSIG, as we could stop there, and a wakeup
  478          * or a SIGCONT (or both) could occur while we were stopped.
  479          * A SIGCONT would cause us to be marked as SSLEEP
  480          * without resuming us, thus we must be ready for sleep
  481          * when CURSIG is called.  If the wakeup happens while we're
  482          * stopped, p->p_wchan will be 0 upon return from CURSIG.
  483          */
  484         if (catch) {
  485                 l->l_flag |= L_SINTR;
  486                 if (((sig = CURSIG(l)) != 0) ||
  487                     ((p->p_flag & P_WEXIT) && p->p_nlwps > 1)) {
  488                         if (l->l_wchan != NULL)
  489                                 unsleep(l);
  490                         l->l_stat = LSONPROC;
  491                         SCHED_UNLOCK(s);
  492                         goto resume;
  493                 }
  494                 if (l->l_wchan == NULL) {
  495                         catch = 0;
  496                         SCHED_UNLOCK(s);
  497                         goto resume;
  498                 }
  499         } else
  500                 sig = 0;
  501         l->l_stat = LSSLEEP;
  502         p->p_nrlwps--;
  503         p->p_stats->p_ru.ru_nvcsw++;
  504         SCHED_ASSERT_LOCKED();
  505         if (l->l_flag & L_SA)
  506                 sa_switch(l, sau, SA_UPCALL_BLOCKED);
  507         else
  508                 mi_switch(l, NULL);
  509 
  510 #if     defined(DDB) && !defined(GPROF)
  511         /* handy breakpoint location after process "wakes" */
  512         __asm(".globl bpendtsleep\nbpendtsleep:");
  513 #endif
  514         /*
  515          * p->p_nrlwps is incremented by whoever made us runnable again,
  516          * either setrunnable() or awaken().
  517          */
  518 
  519         SCHED_ASSERT_UNLOCKED();
  520         splx(s);
  521 
  522  resume:
  523         KDASSERT(l->l_cpu != NULL);
  524         KDASSERT(l->l_cpu == curcpu());
  525         l->l_cpu->ci_schedstate.spc_curpriority = l->l_usrpri;
  526 
  527         l->l_flag &= ~L_SINTR;
  528         if (l->l_flag & L_TIMEOUT) {
  529                 l->l_flag &= ~(L_TIMEOUT|L_CANCELLED);
  530                 if (sig == 0) {
  531 #ifdef KTRACE
  532                         if (KTRPOINT(p, KTR_CSW))
  533                                 ktrcsw(p, 0, 0);
  534 #endif
  535                         if (relock && interlock != NULL)
  536                                 simple_lock(interlock);
  537                         return (EWOULDBLOCK);
  538                 }
  539         } else if (timo)
  540                 callout_stop(&l->l_tsleep_ch);
  541 
  542         if (catch) {
  543                 const int cancelled = l->l_flag & L_CANCELLED;
  544                 l->l_flag &= ~L_CANCELLED;
  545                 if (sig != 0 || (sig = CURSIG(l)) != 0 || cancelled) {
  546 #ifdef KTRACE
  547                         if (KTRPOINT(p, KTR_CSW))
  548                                 ktrcsw(p, 0, 0);
  549 #endif
  550                         if (relock && interlock != NULL)
  551                                 simple_lock(interlock);
  552                         /*
  553                          * If this sleep was canceled, don't let the syscall
  554                          * restart.
  555                          */
  556                         if (cancelled ||
  557                             (SIGACTION(p, sig).sa_flags & SA_RESTART) == 0)
  558                                 return (EINTR);
  559                         return (ERESTART);
  560                 }
  561         }
  562 
  563 #ifdef KTRACE
  564         if (KTRPOINT(p, KTR_CSW))
  565                 ktrcsw(p, 0, 0);
  566 #endif
  567         if (relock && interlock != NULL)
  568                 simple_lock(interlock);
  569 
  570         /* XXXNJW this is very much a kluge.
  571          * revisit. a better way of preventing looping/hanging syscalls like
  572          * wait4() and _lwp_wait() from wedging an exiting process
  573          * would be preferred.
  574          */
  575         if (catch && ((p->p_flag & P_WEXIT) && p->p_nlwps > 1 && exiterr))
  576                 return (EINTR);
  577         return (0);
  578 }
  579 
  580 /*
  581  * Implement timeout for tsleep.
  582  * If process hasn't been awakened (wchan non-zero),
  583  * set timeout flag and undo the sleep.  If proc
  584  * is stopped, just unsleep so it will remain stopped.
  585  */
  586 void
  587 endtsleep(void *arg)
  588 {
  589         struct lwp *l;
  590         int s;
  591 
  592         l = (struct lwp *)arg;
  593         SCHED_LOCK(s);
  594         if (l->l_wchan) {
  595                 if (l->l_stat == LSSLEEP)
  596                         setrunnable(l);
  597                 else
  598                         unsleep(l);
  599                 l->l_flag |= L_TIMEOUT;
  600         }
  601         SCHED_UNLOCK(s);
  602 }
  603 
  604 /*
  605  * Remove a process from its wait queue
  606  */
  607 void
  608 unsleep(struct lwp *l)
  609 {
  610         struct slpque *qp;
  611         struct lwp **hp;
  612 
  613         SCHED_ASSERT_LOCKED();
  614 
  615         if (l->l_wchan) {
  616                 hp = &(qp = SLPQUE(l->l_wchan))->sq_head;
  617                 while (*hp != l)
  618                         hp = &(*hp)->l_forw;
  619                 *hp = l->l_forw;
  620                 if (qp->sq_tailp == &l->l_forw)
  621                         qp->sq_tailp = hp;
  622                 l->l_wchan = 0;
  623         }
  624 }
  625 
  626 __inline void
  627 sa_awaken(struct lwp *l)
  628 {
  629 
  630         SCHED_ASSERT_LOCKED();
  631 
  632         if (l == l->l_savp->savp_lwp && l->l_flag & L_SA_YIELD)
  633                 l->l_flag &= ~L_SA_IDLE;
  634 }
  635 
  636 /*
  637  * Optimized-for-wakeup() version of setrunnable().
  638  */
  639 __inline void
  640 awaken(struct lwp *l)
  641 {
  642 
  643         SCHED_ASSERT_LOCKED();
  644 
  645         if (l->l_proc->p_sa)
  646                 sa_awaken(l);
  647 
  648         if (l->l_slptime > 1)
  649                 updatepri(l);
  650         l->l_slptime = 0;
  651         l->l_stat = LSRUN;
  652         l->l_proc->p_nrlwps++;
  653         /*
  654          * Since curpriority is a user priority, p->p_priority
  655          * is always better than curpriority on the last CPU on
  656          * which it ran.
  657          *
  658          * XXXSMP See affinity comment in resched_proc().
  659          */
  660         if (l->l_flag & L_INMEM) {
  661                 setrunqueue(l);
  662                 KASSERT(l->l_cpu != NULL);
  663                 need_resched(l->l_cpu);
  664         } else
  665                 sched_wakeup(&proc0);
  666 }
  667 
  668 #if defined(MULTIPROCESSOR) || defined(LOCKDEBUG)
  669 void
  670 sched_unlock_idle(void)
  671 {
  672 
  673         simple_unlock(&sched_lock);
  674 }
  675 
  676 void
  677 sched_lock_idle(void)
  678 {
  679 
  680         simple_lock(&sched_lock);
  681 }
  682 #endif /* MULTIPROCESSOR || LOCKDEBUG */
  683 
  684 /*
  685  * Make all processes sleeping on the specified identifier runnable.
  686  */
  687 
  688 void
  689 wakeup(const void *ident)
  690 {
  691         int s;
  692 
  693         SCHED_ASSERT_UNLOCKED();
  694 
  695         SCHED_LOCK(s);
  696         sched_wakeup(ident);
  697         SCHED_UNLOCK(s);
  698 }
  699 
  700 void
  701 sched_wakeup(const void *ident)
  702 {
  703         struct slpque *qp;
  704         struct lwp *l, **q;
  705 
  706         SCHED_ASSERT_LOCKED();
  707 
  708         qp = SLPQUE(ident);
  709  restart:
  710         for (q = &qp->sq_head; (l = *q) != NULL; ) {
  711 #ifdef DIAGNOSTIC
  712                 if (l->l_back || (l->l_stat != LSSLEEP &&
  713                     l->l_stat != LSSTOP && l->l_stat != LSSUSPENDED))
  714                         panic("wakeup");
  715 #endif
  716                 if (l->l_wchan == ident) {
  717                         l->l_wchan = 0;
  718                         *q = l->l_forw;
  719                         if (qp->sq_tailp == &l->l_forw)
  720                                 qp->sq_tailp = q;
  721                         if (l->l_stat == LSSLEEP) {
  722                                 awaken(l);
  723                                 goto restart;
  724                         }
  725                 } else
  726                         q = &l->l_forw;
  727         }
  728 }
  729 
  730 /*
  731  * Make the highest priority process first in line on the specified
  732  * identifier runnable.
  733  */
  734 void
  735 wakeup_one(const void *ident)
  736 {
  737         struct slpque *qp;
  738         struct lwp *l, **q;
  739         struct lwp *best_sleepp, **best_sleepq;
  740         struct lwp *best_stopp, **best_stopq;
  741         int s;
  742 
  743         best_sleepp = best_stopp = NULL;
  744         best_sleepq = best_stopq = NULL;
  745 
  746         SCHED_LOCK(s);
  747 
  748         qp = SLPQUE(ident);
  749 
  750         for (q = &qp->sq_head; (l = *q) != NULL; q = &l->l_forw) {
  751 #ifdef DIAGNOSTIC
  752                 if (l->l_back || (l->l_stat != LSSLEEP &&
  753                     l->l_stat != LSSTOP && l->l_stat != LSSUSPENDED))
  754                         panic("wakeup_one");
  755 #endif
  756                 if (l->l_wchan == ident) {
  757                         if (l->l_stat == LSSLEEP) {
  758                                 if (best_sleepp == NULL ||
  759                                     l->l_priority < best_sleepp->l_priority) {
  760                                         best_sleepp = l;
  761                                         best_sleepq = q;
  762                                 }
  763                         } else {
  764                                 if (best_stopp == NULL ||
  765                                     l->l_priority < best_stopp->l_priority) {
  766                                         best_stopp = l;
  767                                         best_stopq = q;
  768                                 }
  769                         }
  770                 }
  771         }
  772 
  773         /*
  774          * Consider any SSLEEP process higher than the highest priority SSTOP
  775          * process.
  776          */
  777         if (best_sleepp != NULL) {
  778                 l = best_sleepp;
  779                 q = best_sleepq;
  780         } else {
  781                 l = best_stopp;
  782                 q = best_stopq;
  783         }
  784 
  785         if (l != NULL) {
  786                 l->l_wchan = NULL;
  787                 *q = l->l_forw;
  788                 if (qp->sq_tailp == &l->l_forw)
  789                         qp->sq_tailp = q;
  790                 if (l->l_stat == LSSLEEP)
  791                         awaken(l);
  792         }
  793         SCHED_UNLOCK(s);
  794 }
  795 
  796 /*
  797  * General yield call.  Puts the current process back on its run queue and
  798  * performs a voluntary context switch.  Should only be called when the
  799  * current process explicitly requests it (eg sched_yield(2) in compat code).
  800  */
  801 void
  802 yield(void)
  803 {
  804         struct lwp *l = curlwp;
  805         int s;
  806 
  807         SCHED_LOCK(s);
  808         l->l_priority = l->l_usrpri;
  809         l->l_stat = LSRUN;
  810         setrunqueue(l);
  811         l->l_proc->p_stats->p_ru.ru_nvcsw++;
  812         mi_switch(l, NULL);
  813         SCHED_ASSERT_UNLOCKED();
  814         splx(s);
  815 }
  816 
  817 /*
  818  * General preemption call.  Puts the current process back on its run queue
  819  * and performs an involuntary context switch.  If a process is supplied,
  820  * we switch to that process.  Otherwise, we use the normal process selection
  821  * criteria.
  822  */
  823 
  824 void
  825 preempt(int more)
  826 {
  827         struct lwp *l = curlwp;
  828         int r, s;
  829 
  830         SCHED_LOCK(s);
  831         l->l_priority = l->l_usrpri;
  832         l->l_stat = LSRUN;
  833         setrunqueue(l);
  834         l->l_proc->p_stats->p_ru.ru_nivcsw++;
  835         r = mi_switch(l, NULL);
  836         SCHED_ASSERT_UNLOCKED();
  837         splx(s);
  838         if ((l->l_flag & L_SA) != 0 && r != 0 && more == 0)
  839                 sa_preempt(l);
  840 }
  841 
  842 /*
  843  * The machine independent parts of context switch.
  844  * Must be called at splsched() (no higher!) and with
  845  * the sched_lock held.
  846  * Switch to "new" if non-NULL, otherwise let cpu_switch choose
  847  * the next lwp.
  848  *
  849  * Returns 1 if another process was actually run.
  850  */
  851 int
  852 mi_switch(struct lwp *l, struct lwp *newl)
  853 {
  854         struct schedstate_percpu *spc;
  855         struct rlimit *rlim;
  856         long s, u;
  857         struct timeval tv;
  858         int hold_count;
  859         struct proc *p = l->l_proc;
  860         int retval;
  861 
  862         SCHED_ASSERT_LOCKED();
  863 
  864         /*
  865          * Release the kernel_lock, as we are about to yield the CPU.
  866          * The scheduler lock is still held until cpu_switch()
  867          * selects a new process and removes it from the run queue.
  868          */
  869         hold_count = KERNEL_LOCK_RELEASE_ALL();
  870 
  871         KDASSERT(l->l_cpu != NULL);
  872         KDASSERT(l->l_cpu == curcpu());
  873 
  874         spc = &l->l_cpu->ci_schedstate;
  875 
  876 #if defined(LOCKDEBUG) || defined(DIAGNOSTIC)
  877         spinlock_switchcheck();
  878 #endif
  879 #ifdef LOCKDEBUG
  880         simple_lock_switchcheck();
  881 #endif
  882 
  883         /*
  884          * Compute the amount of time during which the current
  885          * process was running.
  886          */
  887         microtime(&tv);
  888         u = p->p_rtime.tv_usec +
  889             (tv.tv_usec - spc->spc_runtime.tv_usec);
  890         s = p->p_rtime.tv_sec + (tv.tv_sec - spc->spc_runtime.tv_sec);
  891         if (u < 0) {
  892                 u += 1000000;
  893                 s--;
  894         } else if (u >= 1000000) {
  895                 u -= 1000000;
  896                 s++;
  897         }
  898         p->p_rtime.tv_usec = u;
  899         p->p_rtime.tv_sec = s;
  900 
  901         /*
  902          * Check if the process exceeds its CPU resource allocation.
  903          * If over max, kill it.  In any case, if it has run for more
  904          * than 10 minutes, reduce priority to give others a chance.
  905          */
  906         rlim = &p->p_rlimit[RLIMIT_CPU];
  907         if (s >= rlim->rlim_cur) {
  908                 /*
  909                  * XXXSMP: we're inside the scheduler lock perimeter;
  910                  * use sched_psignal.
  911                  */
  912                 if (s >= rlim->rlim_max)
  913                         sched_psignal(p, SIGKILL);
  914                 else {
  915                         sched_psignal(p, SIGXCPU);
  916                         if (rlim->rlim_cur < rlim->rlim_max)
  917                                 rlim->rlim_cur += 5;
  918                 }
  919         }
  920         if (autonicetime && s > autonicetime && p->p_ucred->cr_uid &&
  921             p->p_nice == NZERO) {
  922                 p->p_nice = autoniceval + NZERO;
  923                 resetpriority(l);
  924         }
  925 
  926         /*
  927          * Process is about to yield the CPU; clear the appropriate
  928          * scheduling flags.
  929          */
  930         spc->spc_flags &= ~SPCF_SWITCHCLEAR;
  931 
  932 #ifdef KSTACK_CHECK_MAGIC
  933         kstack_check_magic(l);
  934 #endif
  935 
  936         /*
  937          * If we are using h/w performance counters, save context.
  938          */
  939 #if PERFCTRS
  940         if (PMC_ENABLED(p))
  941                 pmc_save_context(p);
  942 #endif
  943 
  944         /*
  945          * Switch to the new current process.  When we
  946          * run again, we'll return back here.
  947          */
  948         uvmexp.swtch++;
  949         if (newl == NULL) {
  950                 retval = cpu_switch(l, NULL);
  951         } else {
  952                 remrunqueue(newl);
  953                 cpu_switchto(l, newl);
  954                 retval = 0;
  955         }
  956 
  957         /*
  958          * If we are using h/w performance counters, restore context.
  959          */
  960 #if PERFCTRS
  961         if (PMC_ENABLED(p))
  962                 pmc_restore_context(p);
  963 #endif
  964 
  965         /*
  966          * Make sure that MD code released the scheduler lock before
  967          * resuming us.
  968          */
  969         SCHED_ASSERT_UNLOCKED();
  970 
  971         /*
  972          * We're running again; record our new start time.  We might
  973          * be running on a new CPU now, so don't use the cache'd
  974          * schedstate_percpu pointer.
  975          */
  976         KDASSERT(l->l_cpu != NULL);
  977         KDASSERT(l->l_cpu == curcpu());
  978         microtime(&l->l_cpu->ci_schedstate.spc_runtime);
  979 
  980         /*
  981          * Reacquire the kernel_lock now.  We do this after we've
  982          * released the scheduler lock to avoid deadlock, and before
  983          * we reacquire the interlock.
  984          */
  985         KERNEL_LOCK_ACQUIRE_COUNT(hold_count);
  986 
  987         return retval;
  988 }
  989 
  990 /*
  991  * Initialize the (doubly-linked) run queues
  992  * to be empty.
  993  */
  994 void
  995 rqinit()
  996 {
  997         int i;
  998 
  999         for (i = 0; i < RUNQUE_NQS; i++)
 1000                 sched_qs[i].ph_link = sched_qs[i].ph_rlink =
 1001                     (struct lwp *)&sched_qs[i];
 1002 }
 1003 
 1004 static __inline void
 1005 resched_proc(struct lwp *l, u_char pri)
 1006 {
 1007         struct cpu_info *ci;
 1008 
 1009         /*
 1010          * XXXSMP
 1011          * Since l->l_cpu persists across a context switch,
 1012          * this gives us *very weak* processor affinity, in
 1013          * that we notify the CPU on which the process last
 1014          * ran that it should try to switch.
 1015          *
 1016          * This does not guarantee that the process will run on
 1017          * that processor next, because another processor might
 1018          * grab it the next time it performs a context switch.
 1019          *
 1020          * This also does not handle the case where its last
 1021          * CPU is running a higher-priority process, but every
 1022          * other CPU is running a lower-priority process.  There
 1023          * are ways to handle this situation, but they're not
 1024          * currently very pretty, and we also need to weigh the
 1025          * cost of moving a process from one CPU to another.
 1026          *
 1027          * XXXSMP
 1028          * There is also the issue of locking the other CPU's
 1029          * sched state, which we currently do not do.
 1030          */
 1031         ci = (l->l_cpu != NULL) ? l->l_cpu : curcpu();
 1032         if (pri < ci->ci_schedstate.spc_curpriority)
 1033                 need_resched(ci);
 1034 }
 1035 
 1036 /*
 1037  * Change process state to be runnable,
 1038  * placing it on the run queue if it is in memory,
 1039  * and awakening the swapper if it isn't in memory.
 1040  */
 1041 void
 1042 setrunnable(struct lwp *l)
 1043 {
 1044         struct proc *p = l->l_proc;
 1045 
 1046         SCHED_ASSERT_LOCKED();
 1047 
 1048         switch (l->l_stat) {
 1049         case 0:
 1050         case LSRUN:
 1051         case LSONPROC:
 1052         case LSZOMB:
 1053         case LSDEAD:
 1054         default:
 1055                 panic("setrunnable: lwp %p state was %d", l, l->l_stat);
 1056         case LSSTOP:
 1057                 /*
 1058                  * If we're being traced (possibly because someone attached us
 1059                  * while we were stopped), check for a signal from the debugger.
 1060                  */
 1061                 if ((p->p_flag & P_TRACED) != 0 && p->p_xstat != 0) {
 1062                         sigaddset(&p->p_sigctx.ps_siglist, p->p_xstat);
 1063                         CHECKSIGS(p);
 1064                 }
 1065         case LSSLEEP:
 1066                 unsleep(l);             /* e.g. when sending signals */
 1067                 break;
 1068 
 1069         case LSIDL:
 1070                 break;
 1071         case LSSUSPENDED:
 1072                 break;
 1073         }
 1074 
 1075         if (l->l_proc->p_sa)
 1076                 sa_awaken(l);
 1077 
 1078         l->l_stat = LSRUN;
 1079         p->p_nrlwps++;
 1080 
 1081         if (l->l_flag & L_INMEM)
 1082                 setrunqueue(l);
 1083 
 1084         if (l->l_slptime > 1)
 1085                 updatepri(l);
 1086         l->l_slptime = 0;
 1087         if ((l->l_flag & L_INMEM) == 0)
 1088                 sched_wakeup((caddr_t)&proc0);
 1089         else
 1090                 resched_proc(l, l->l_priority);
 1091 }
 1092 
 1093 /*
 1094  * Compute the priority of a process when running in user mode.
 1095  * Arrange to reschedule if the resulting priority is better
 1096  * than that of the current process.
 1097  */
 1098 void
 1099 resetpriority(struct lwp *l)
 1100 {
 1101         unsigned int newpriority;
 1102         struct proc *p = l->l_proc;
 1103 
 1104         SCHED_ASSERT_LOCKED();
 1105 
 1106         newpriority = PUSER + p->p_estcpu +
 1107                         NICE_WEIGHT * (p->p_nice - NZERO);
 1108         newpriority = min(newpriority, MAXPRI);
 1109         l->l_usrpri = newpriority;
 1110         resched_proc(l, l->l_usrpri);
 1111 }
 1112 
 1113 /*
 1114  * Recompute priority for all LWPs in a process.
 1115  */
 1116 void
 1117 resetprocpriority(struct proc *p)
 1118 {
 1119         struct lwp *l;
 1120 
 1121         LIST_FOREACH(l, &p->p_lwps, l_sibling)
 1122             resetpriority(l);
 1123 }
 1124 
 1125 /*
 1126  * We adjust the priority of the current process.  The priority of a process
 1127  * gets worse as it accumulates CPU time.  The CPU usage estimator (p_estcpu)
 1128  * is increased here.  The formula for computing priorities (in kern_synch.c)
 1129  * will compute a different value each time p_estcpu increases. This can
 1130  * cause a switch, but unless the priority crosses a PPQ boundary the actual
 1131  * queue will not change.  The CPU usage estimator ramps up quite quickly
 1132  * when the process is running (linearly), and decays away exponentially, at
 1133  * a rate which is proportionally slower when the system is busy.  The basic
 1134  * principle is that the system will 90% forget that the process used a lot
 1135  * of CPU time in 5 * loadav seconds.  This causes the system to favor
 1136  * processes which haven't run much recently, and to round-robin among other
 1137  * processes.
 1138  */
 1139 
 1140 void
 1141 schedclock(struct lwp *l)
 1142 {
 1143         struct proc *p = l->l_proc;
 1144         int s;
 1145 
 1146         p->p_estcpu = ESTCPULIM(p->p_estcpu + 1);
 1147         SCHED_LOCK(s);
 1148         resetpriority(l);
 1149         SCHED_UNLOCK(s);
 1150 
 1151         if (l->l_priority >= PUSER)
 1152                 l->l_priority = l->l_usrpri;
 1153 }
 1154 
 1155 void
 1156 suspendsched()
 1157 {
 1158         struct lwp *l;
 1159         int s;
 1160 
 1161         /*
 1162          * Convert all non-P_SYSTEM LSSLEEP or LSRUN processes to
 1163          * LSSUSPENDED.
 1164          */
 1165         proclist_lock_read();
 1166         SCHED_LOCK(s);
 1167         LIST_FOREACH(l, &alllwp, l_list) {
 1168                 if ((l->l_proc->p_flag & P_SYSTEM) != 0)
 1169                         continue;
 1170 
 1171                 switch (l->l_stat) {
 1172                 case LSRUN:
 1173                         l->l_proc->p_nrlwps--;
 1174                         if ((l->l_flag & L_INMEM) != 0)
 1175                                 remrunqueue(l);
 1176                         /* FALLTHROUGH */
 1177                 case LSSLEEP:
 1178                         l->l_stat = LSSUSPENDED;
 1179                         break;
 1180                 case LSONPROC:
 1181                         /*
 1182                          * XXX SMP: we need to deal with processes on
 1183                          * others CPU !
 1184                          */
 1185                         break;
 1186                 default:
 1187                         break;
 1188                 }
 1189         }
 1190         SCHED_UNLOCK(s);
 1191         proclist_unlock_read();
 1192 }
 1193 
 1194 /*
 1195  * Low-level routines to access the run queue.  Optimised assembler
 1196  * routines can override these.
 1197  */
 1198 
 1199 #ifndef __HAVE_MD_RUNQUEUE
 1200 
 1201 /*
 1202  * On some architectures, it's faster to use a MSB ordering for the priorites
 1203  * than the traditional LSB ordering.
 1204  */
 1205 #ifdef __HAVE_BIGENDIAN_BITOPS
 1206 #define RQMASK(n) (0x80000000 >> (n))
 1207 #else
 1208 #define RQMASK(n) (0x00000001 << (n))
 1209 #endif
 1210 
 1211 /*
 1212  * The primitives that manipulate the run queues.  whichqs tells which
 1213  * of the 32 queues qs have processes in them.  Setrunqueue puts processes
 1214  * into queues, remrunqueue removes them from queues.  The running process is
 1215  * on no queue, other processes are on a queue related to p->p_priority,
 1216  * divided by 4 actually to shrink the 0-127 range of priorities into the 32
 1217  * available queues.
 1218  */
 1219 
 1220 #ifdef RQDEBUG
 1221 static void
 1222 checkrunqueue(int whichq, struct lwp *l)
 1223 {
 1224         const struct prochd * const rq = &sched_qs[whichq];
 1225         struct lwp *l2;
 1226         int found = 0;
 1227         int die = 0;
 1228         int empty = 1;
 1229         for (l2 = rq->ph_link; l2 != (void*) rq; l2 = l2->l_forw) {
 1230                 if (l2->l_stat != LSRUN) {
 1231                         printf("checkrunqueue[%d]: lwp %p state (%d) "
 1232                             " != LSRUN\n", whichq, l2, l2->l_stat);
 1233                 }
 1234                 if (l2->l_back->l_forw != l2) {
 1235                         printf("checkrunqueue[%d]: lwp %p back-qptr (%p) "
 1236                             "corrupt %p\n", whichq, l2, l2->l_back,
 1237                             l2->l_back->l_forw);
 1238                         die = 1;
 1239                 }
 1240                 if (l2->l_forw->l_back != l2) {
 1241                         printf("checkrunqueue[%d]: lwp %p forw-qptr (%p) "
 1242                             "corrupt %p\n", whichq, l2, l2->l_forw,
 1243                             l2->l_forw->l_back);
 1244                         die = 1;
 1245                 }
 1246                 if (l2 == l)
 1247                         found = 1;
 1248                 empty = 0;
 1249         }
 1250         if (empty && (sched_whichqs & RQMASK(whichq)) != 0) {
 1251                 printf("checkrunqueue[%d]: bit set for empty run-queue %p\n",
 1252                     whichq, rq);
 1253                 die = 1;
 1254         } else if (!empty && (sched_whichqs & RQMASK(whichq)) == 0) {
 1255                 printf("checkrunqueue[%d]: bit clear for non-empty "
 1256                     "run-queue %p\n", whichq, rq);
 1257                 die = 1;
 1258         }
 1259         if (l != NULL && (sched_whichqs & RQMASK(whichq)) == 0) {
 1260                 printf("checkrunqueue[%d]: bit clear for active lwp %p\n",
 1261                     whichq, l);
 1262                 die = 1;
 1263         }
 1264         if (l != NULL && empty) {
 1265                 printf("checkrunqueue[%d]: empty run-queue %p with "
 1266                     "active lwp %p\n", whichq, rq, l);
 1267                 die = 1;
 1268         }
 1269         if (l != NULL && !found) {
 1270                 printf("checkrunqueue[%d]: lwp %p not in runqueue %p!",
 1271                     whichq, l, rq);
 1272                 die = 1;
 1273         }
 1274         if (die)
 1275                 panic("checkrunqueue: inconsistency found");
 1276 }
 1277 #endif /* RQDEBUG */
 1278 
 1279 void
 1280 setrunqueue(struct lwp *l)
 1281 {
 1282         struct prochd *rq;
 1283         struct lwp *prev;
 1284         const int whichq = l->l_priority / 4;
 1285 
 1286 #ifdef RQDEBUG
 1287         checkrunqueue(whichq, NULL);
 1288 #endif
 1289 #ifdef DIAGNOSTIC
 1290         if (l->l_back != NULL || l->l_wchan != NULL || l->l_stat != LSRUN)
 1291                 panic("setrunqueue");
 1292 #endif
 1293         sched_whichqs |= RQMASK(whichq);
 1294         rq = &sched_qs[whichq];
 1295         prev = rq->ph_rlink;
 1296         l->l_forw = (struct lwp *)rq;
 1297         rq->ph_rlink = l;
 1298         prev->l_forw = l;
 1299         l->l_back = prev;
 1300 #ifdef RQDEBUG
 1301         checkrunqueue(whichq, l);
 1302 #endif
 1303 }
 1304 
 1305 void
 1306 remrunqueue(struct lwp *l)
 1307 {
 1308         struct lwp *prev, *next;
 1309         const int whichq = l->l_priority / 4;
 1310 #ifdef RQDEBUG
 1311         checkrunqueue(whichq, l);
 1312 #endif
 1313 #ifdef DIAGNOSTIC
 1314         if (((sched_whichqs & RQMASK(whichq)) == 0))
 1315                 panic("remrunqueue: bit %d not set", whichq);
 1316 #endif
 1317         prev = l->l_back;
 1318         l->l_back = NULL;
 1319         next = l->l_forw;
 1320         prev->l_forw = next;
 1321         next->l_back = prev;
 1322         if (prev == next)
 1323                 sched_whichqs &= ~RQMASK(whichq);
 1324 #ifdef RQDEBUG
 1325         checkrunqueue(whichq, NULL);
 1326 #endif
 1327 }
 1328 
 1329 #undef RQMASK
 1330 #endif /* !defined(__HAVE_MD_RUNQUEUE) */

Cache object: 10204a48d7ae3b9c236ce8fcfff3e891


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.