kern_synch.c

Version: - FREEBSD - FREEBSD-13-STABLE - FREEBSD-13-0 - FREEBSD-12-STABLE - FREEBSD-12-0 - FREEBSD-11-STABLE - FREEBSD-11-0 - FREEBSD-10-STABLE - FREEBSD-10-0 - FREEBSD-9-STABLE - FREEBSD-9-0 - FREEBSD-8-STABLE - FREEBSD-8-0 - FREEBSD-7-STABLE - FREEBSD-7-0 - FREEBSD-6-STABLE - FREEBSD-6-0 - FREEBSD-5-STABLE - FREEBSD-5-0 - FREEBSD-4-STABLE - FREEBSD-3-STABLE - FREEBSD22 - l41 - OPENBSD - linux-2.6 - MK84 - PLAN9 - xnu-8792
SearchContext: - none - 3 - 10
    1 /*-
    2  * Copyright (c) 1982, 1986, 1990, 1991, 1993
    3  *      The Regents of the University of California.  All rights reserved.
    4  * (c) UNIX System Laboratories, Inc.
    5  * All or some portions of this file are derived from material licensed
    6  * to the University of California by American Telephone and Telegraph
    7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
    8  * the permission of UNIX System Laboratories, Inc.
    9  *
   10  * Redistribution and use in source and binary forms, with or without
   11  * modification, are permitted provided that the following conditions
   12  * are met:
   13  * 1. Redistributions of source code must retain the above copyright
   14  *    notice, this list of conditions and the following disclaimer.
   15  * 2. Redistributions in binary form must reproduce the above copyright
   16  *    notice, this list of conditions and the following disclaimer in the
   17  *    documentation and/or other materials provided with the distribution.
   18  * 3. All advertising materials mentioning features or use of this software
   19  *    must display the following acknowledgement:
   20  *      This product includes software developed by the University of
   21  *      California, Berkeley and its contributors.
   22  * 4. Neither the name of the University nor the names of its contributors
   23  *    may be used to endorse or promote products derived from this software
   24  *    without specific prior written permission.
   25  *
   26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   36  * SUCH DAMAGE.
   37  *
   38  *      @(#)kern_synch.c        8.9 (Berkeley) 5/19/95
   39  * $FreeBSD$
   40  */
   41 
   42 #include "opt_ktrace.h"
   43 
   44 #include <sys/param.h>
   45 #include <sys/systm.h>
   46 #include <sys/proc.h>
   47 #include <sys/kernel.h>
   48 #include <sys/signalvar.h>
   49 #include <sys/resourcevar.h>
   50 #include <sys/vmmeter.h>
   51 #include <sys/sysctl.h>
   52 #ifdef KTRACE
   53 #include <sys/uio.h>
   54 #include <sys/ktrace.h>
   55 #endif
   56 
   57 #include <machine/cpu.h>
   58 #include <machine/ipl.h>
   59 #include <machine/smp.h>
   60 
   61 static void sched_setup __P((void *dummy));
   62 SYSINIT(sched_setup, SI_SUB_KICK_SCHEDULER, SI_ORDER_FIRST, sched_setup, NULL)
   63 
   64 u_char  curpriority;
   65 int     hogticks;
   66 int     lbolt;
   67 int     sched_quantum;          /* Roundrobin scheduling quantum in ticks. */
   68 
   69 static struct callout loadav_callout;
   70 
   71 struct loadavg averunnable =
   72         { {0, 0, 0}, FSCALE };  /* load average, of runnable procs */
   73 /*
   74  * Constants for averages over 1, 5, and 15 minutes
   75  * when sampling at 5 second intervals.
   76  */
   77 static fixpt_t cexp[3] = {
   78         0.9200444146293232 * FSCALE,    /* exp(-1/12) */
   79         0.9834714538216174 * FSCALE,    /* exp(-1/60) */
   80         0.9944598480048967 * FSCALE,    /* exp(-1/180) */
   81 };
   82 
   83 static int      curpriority_cmp __P((struct proc *p));
   84 static void     endtsleep __P((void *));
   85 static void     loadav __P((void *arg));
   86 static void     maybe_resched __P((struct proc *chk));
   87 static void     roundrobin __P((void *arg));
   88 static void     schedcpu __P((void *arg));
   89 static void     updatepri __P((struct proc *p));
   90 
   91 static int
   92 sysctl_kern_quantum(SYSCTL_HANDLER_ARGS)
   93 {
   94         int error, new_val;
   95 
   96         new_val = sched_quantum * tick;
   97         error = sysctl_handle_int(oidp, &new_val, 0, req);
   98         if (error != 0 || req->newptr == NULL)
   99                 return (error);
  100         if (new_val < tick)
  101                 return (EINVAL);
  102         sched_quantum = new_val / tick;
  103         hogticks = 2 * sched_quantum;
  104         return (0);
  105 }
  106 
  107 SYSCTL_PROC(_kern, OID_AUTO, quantum, CTLTYPE_INT|CTLFLAG_RW,
  108         0, sizeof sched_quantum, sysctl_kern_quantum, "I", "");
  109 
  110 /*-
  111  * Compare priorities.  Return:
  112  *     <0: priority of p < current priority
  113  *      0: priority of p == current priority
  114  *     >0: priority of p > current priority
  115  * The priorities are the normal priorities or the normal realtime priorities
  116  * if p is on the same scheduler as curproc.  Otherwise the process on the
  117  * more realtimeish scheduler has lowest priority.  As usual, a higher
  118  * priority really means a lower priority.
  119  */
  120 static int
  121 curpriority_cmp(p)
  122         struct proc *p;
  123 {
  124         int c_class, p_class;
  125 
  126         c_class = RTP_PRIO_BASE(curproc->p_rtprio.type);
  127         p_class = RTP_PRIO_BASE(p->p_rtprio.type);
  128         if (p_class != c_class)
  129                 return (p_class - c_class);
  130         if (p_class == RTP_PRIO_NORMAL)
  131                 return (((int)p->p_priority - (int)curpriority) / PPQ);
  132         return ((int)p->p_rtprio.prio - (int)curproc->p_rtprio.prio);
  133 }
  134 
  135 /*
  136  * Arrange to reschedule if necessary, taking the priorities and
  137  * schedulers into account.
  138  */
  139 static void
  140 maybe_resched(chk)
  141         struct proc *chk;
  142 {
  143         struct proc *p = curproc; /* XXX */
  144 
  145         /*
  146          * XXX idle scheduler still broken because proccess stays on idle
  147          * scheduler during waits (such as when getting FS locks).  If a
  148          * standard process becomes runaway cpu-bound, the system can lockup
  149          * due to idle-scheduler processes in wakeup never getting any cpu.
  150          */
  151         if (p == NULL) {
  152 #if 0
  153                 need_resched();
  154 #endif
  155         } else if (chk == p) {
  156                 /* We may need to yield if our priority has been raised. */
  157                 if (curpriority_cmp(chk) > 0)
  158                         need_resched();
  159         } else if (curpriority_cmp(chk) < 0)
  160                 need_resched();
  161 }
  162 
  163 int 
  164 roundrobin_interval(void)
  165 {
  166         return (sched_quantum);
  167 }
  168 
  169 /*
  170  * Force switch among equal priority processes every 100ms.
  171  */
  172 /* ARGSUSED */
  173 static void
  174 roundrobin(arg)
  175         void *arg;
  176 {
  177 #ifndef SMP
  178         struct proc *p = curproc; /* XXX */
  179 #endif
  180  
  181 #ifdef SMP
  182         need_resched();
  183         forward_roundrobin();
  184 #else 
  185         if (p == 0 || RTP_PRIO_NEED_RR(p->p_rtprio.type))
  186                 need_resched();
  187 #endif
  188 
  189         timeout(roundrobin, NULL, sched_quantum);
  190 }
  191 
  192 /*
  193  * Constants for digital decay and forget:
  194  *      90% of (p_estcpu) usage in 5 * loadav time
  195  *      95% of (p_pctcpu) usage in 60 seconds (load insensitive)
  196  *          Note that, as ps(1) mentions, this can let percentages
  197  *          total over 100% (I've seen 137.9% for 3 processes).
  198  *
  199  * Note that schedclock() updates p_estcpu and p_cpticks asynchronously.
  200  *
  201  * We wish to decay away 90% of p_estcpu in (5 * loadavg) seconds.
  202  * That is, the system wants to compute a value of decay such
  203  * that the following for loop:
  204  *      for (i = 0; i < (5 * loadavg); i++)
  205  *              p_estcpu *= decay;
  206  * will compute
  207  *      p_estcpu *= 0.1;
  208  * for all values of loadavg:
  209  *
  210  * Mathematically this loop can be expressed by saying:
  211  *      decay ** (5 * loadavg) ~= .1
  212  *
  213  * The system computes decay as:
  214  *      decay = (2 * loadavg) / (2 * loadavg + 1)
  215  *
  216  * We wish to prove that the system's computation of decay
  217  * will always fulfill the equation:
  218  *      decay ** (5 * loadavg) ~= .1
  219  *
  220  * If we compute b as:
  221  *      b = 2 * loadavg
  222  * then
  223  *      decay = b / (b + 1)
  224  *
  225  * We now need to prove two things:
  226  *      1) Given factor ** (5 * loadavg) ~= .1, prove factor == b/(b+1)
  227  *      2) Given b/(b+1) ** power ~= .1, prove power == (5 * loadavg)
  228  *
  229  * Facts:
  230  *         For x close to zero, exp(x) =~ 1 + x, since
  231  *              exp(x) = 0! + x**1/1! + x**2/2! + ... .
  232  *              therefore exp(-1/b) =~ 1 - (1/b) = (b-1)/b.
  233  *         For x close to zero, ln(1+x) =~ x, since
  234  *              ln(1+x) = x - x**2/2 + x**3/3 - ...     -1 < x < 1
  235  *              therefore ln(b/(b+1)) = ln(1 - 1/(b+1)) =~ -1/(b+1).
  236  *         ln(.1) =~ -2.30
  237  *
  238  * Proof of (1):
  239  *    Solve (factor)**(power) =~ .1 given power (5*loadav):
  240  *      solving for factor,
  241  *      ln(factor) =~ (-2.30/5*loadav), or
  242  *      factor =~ exp(-1/((5/2.30)*loadav)) =~ exp(-1/(2*loadav)) =
  243  *          exp(-1/b) =~ (b-1)/b =~ b/(b+1).                    QED
  244  *
  245  * Proof of (2):
  246  *    Solve (factor)**(power) =~ .1 given factor == (b/(b+1)):
  247  *      solving for power,
  248  *      power*ln(b/(b+1)) =~ -2.30, or
  249  *      power =~ 2.3 * (b + 1) = 4.6*loadav + 2.3 =~ 5*loadav.  QED
  250  *
  251  * Actual power values for the implemented algorithm are as follows:
  252  *      loadav: 1       2       3       4
  253  *      power:  5.68    10.32   14.94   19.55
  254  */
  255 
  256 /* calculations for digital decay to forget 90% of usage in 5*loadav sec */
  257 #define loadfactor(loadav)      (2 * (loadav))
  258 #define decay_cpu(loadfac, cpu) (((loadfac) * (cpu)) / ((loadfac) + FSCALE))
  259 
  260 /* decay 95% of `p_pctcpu' in 60 seconds; see CCPU_SHIFT before changing */
  261 static fixpt_t  ccpu = 0.95122942450071400909 * FSCALE; /* exp(-1/20) */
  262 SYSCTL_INT(_kern, OID_AUTO, ccpu, CTLFLAG_RD, &ccpu, 0, "");
  263 
  264 /* kernel uses `FSCALE', userland (SHOULD) use kern.fscale */
  265 static int      fscale __unused = FSCALE;
  266 SYSCTL_INT(_kern, OID_AUTO, fscale, CTLFLAG_RD, 0, FSCALE, "");
  267 
  268 /*
  269  * If `ccpu' is not equal to `exp(-1/20)' and you still want to use the
  270  * faster/more-accurate formula, you'll have to estimate CCPU_SHIFT below
  271  * and possibly adjust FSHIFT in "param.h" so that (FSHIFT >= CCPU_SHIFT).
  272  *
  273  * To estimate CCPU_SHIFT for exp(-1/20), the following formula was used:
  274  *      1 - exp(-1/20) ~= 0.0487 ~= 0.0488 == 1 (fixed pt, *11* bits).
  275  *
  276  * If you don't want to bother with the faster/more-accurate formula, you
  277  * can set CCPU_SHIFT to (FSHIFT + 1) which will use a slower/less-accurate
  278  * (more general) method of calculating the %age of CPU used by a process.
  279  */
  280 #define CCPU_SHIFT      11
  281 
  282 /*
  283  * Recompute process priorities, every hz ticks.
  284  */
  285 /* ARGSUSED */
  286 static void
  287 schedcpu(arg)
  288         void *arg;
  289 {
  290         register fixpt_t loadfac = loadfactor(averunnable.ldavg[0]);
  291         register struct proc *p;
  292         register int realstathz, s;
  293 
  294         realstathz = stathz ? stathz : hz;
  295         LIST_FOREACH(p, &allproc, p_list) {
  296                 /*
  297                  * Increment time in/out of memory and sleep time
  298                  * (if sleeping).  We ignore overflow; with 16-bit int's
  299                  * (remember them?) overflow takes 45 days.
  300                  */
  301                 p->p_swtime++;
  302                 if (p->p_stat == SSLEEP || p->p_stat == SSTOP)
  303                         p->p_slptime++;
  304                 p->p_pctcpu = (p->p_pctcpu * ccpu) >> FSHIFT;
  305                 /*
  306                  * If the process has slept the entire second,
  307                  * stop recalculating its priority until it wakes up.
  308                  */
  309                 if (p->p_slptime > 1)
  310                         continue;
  311                 s = splhigh();  /* prevent state changes and protect run queue */
  312                 /*
  313                  * p_pctcpu is only for ps.
  314                  */
  315 #if     (FSHIFT >= CCPU_SHIFT)
  316                 p->p_pctcpu += (realstathz == 100)?
  317                         ((fixpt_t) p->p_cpticks) << (FSHIFT - CCPU_SHIFT):
  318                         100 * (((fixpt_t) p->p_cpticks)
  319                                 << (FSHIFT - CCPU_SHIFT)) / realstathz;
  320 #else
  321                 p->p_pctcpu += ((FSCALE - ccpu) *
  322                         (p->p_cpticks * FSCALE / realstathz)) >> FSHIFT;
  323 #endif
  324                 p->p_cpticks = 0;
  325                 p->p_estcpu = decay_cpu(loadfac, p->p_estcpu);
  326                 resetpriority(p);
  327                 if (p->p_priority >= PUSER) {
  328                         if ((p != curproc) &&
  329 #ifdef SMP
  330                             p->p_oncpu == 0xff &&       /* idle */
  331 #endif
  332                             p->p_stat == SRUN &&
  333                             (p->p_flag & P_INMEM) &&
  334                             (p->p_priority / PPQ) != (p->p_usrpri / PPQ)) {
  335                                 remrunqueue(p);
  336                                 p->p_priority = p->p_usrpri;
  337                                 setrunqueue(p);
  338                         } else
  339                                 p->p_priority = p->p_usrpri;
  340                 }
  341                 splx(s);
  342         }
  343         wakeup((caddr_t)&lbolt);
  344         timeout(schedcpu, (void *)0, hz);
  345 }
  346 
  347 /*
  348  * Recalculate the priority of a process after it has slept for a while.
  349  * For all load averages >= 1 and max p_estcpu of 255, sleeping for at
  350  * least six times the loadfactor will decay p_estcpu to zero.
  351  */
  352 static void
  353 updatepri(p)
  354         register struct proc *p;
  355 {
  356         register unsigned int newcpu = p->p_estcpu;
  357         register fixpt_t loadfac = loadfactor(averunnable.ldavg[0]);
  358 
  359         if (p->p_slptime > 5 * loadfac)
  360                 p->p_estcpu = 0;
  361         else {
  362                 p->p_slptime--; /* the first time was done in schedcpu */
  363                 while (newcpu && --p->p_slptime)
  364                         newcpu = decay_cpu(loadfac, newcpu);
  365                 p->p_estcpu = newcpu;
  366         }
  367         resetpriority(p);
  368 }
  369 
  370 /*
  371  * We're only looking at 7 bits of the address; everything is
  372  * aligned to 4, lots of things are aligned to greater powers
  373  * of 2.  Shift right by 8, i.e. drop the bottom 256 worth.
  374  */
  375 #define TABLESIZE       128
  376 static TAILQ_HEAD(slpquehead, proc) slpque[TABLESIZE];
  377 #define LOOKUP(x)       (((intptr_t)(x) >> 8) & (TABLESIZE - 1))
  378 
  379 /*
  380  * During autoconfiguration or after a panic, a sleep will simply
  381  * lower the priority briefly to allow interrupts, then return.
  382  * The priority to be used (safepri) is machine-dependent, thus this
  383  * value is initialized and maintained in the machine-dependent layers.
  384  * This priority will typically be 0, or the lowest priority
  385  * that is safe for use on the interrupt stack; it can be made
  386  * higher to block network software interrupts after panics.
  387  */
  388 int safepri;
  389 
  390 void
  391 sleepinit(void)
  392 {
  393         int i;
  394 
  395         sched_quantum = hz/10;
  396         hogticks = 2 * sched_quantum;
  397         for (i = 0; i < TABLESIZE; i++)
  398                 TAILQ_INIT(&slpque[i]);
  399 }
  400 
  401 /*
  402  * General sleep call.  Suspends the current process until a wakeup is
  403  * performed on the specified identifier.  The process will then be made
  404  * runnable with the specified priority.  Sleeps at most timo/hz seconds
  405  * (0 means no timeout).  If pri includes PCATCH flag, signals are checked
  406  * before and after sleeping, else signals are not checked.  Returns 0 if
  407  * awakened, EWOULDBLOCK if the timeout expires.  If PCATCH is set and a
  408  * signal needs to be delivered, ERESTART is returned if the current system
  409  * call should be restarted if possible, and EINTR is returned if the system
  410  * call should be interrupted by the signal (return EINTR).
  411  */
  412 int
  413 tsleep(ident, priority, wmesg, timo)
  414         void *ident;
  415         int priority, timo;
  416         const char *wmesg;
  417 {
  418         struct proc *p = curproc;
  419         int s, sig, catch = priority & PCATCH;
  420         struct callout_handle thandle;
  421 
  422 #ifdef KTRACE
  423         if (p && KTRPOINT(p, KTR_CSW))
  424                 ktrcsw(p->p_tracep, 1, 0);
  425 #endif
  426         s = splhigh();
  427         if (cold || panicstr) {
  428                 /*
  429                  * After a panic, or during autoconfiguration,
  430                  * just give interrupts a chance, then just return;
  431                  * don't run any other procs or panic below,
  432                  * in case this is the idle process and already asleep.
  433                  */
  434                 splx(safepri);
  435                 splx(s);
  436                 return (0);
  437         }
  438         KASSERT(p != NULL, ("tsleep1"));
  439         KASSERT(ident != NULL && p->p_stat == SRUN, ("tsleep"));
  440         /*
  441          * Process may be sitting on a slpque if asleep() was called, remove
  442          * it before re-adding.
  443          */
  444         if (p->p_wchan != NULL)
  445                 unsleep(p);
  446 
  447         p->p_wchan = ident;
  448         p->p_wmesg = wmesg;
  449         p->p_slptime = 0;
  450         p->p_priority = priority & PRIMASK;
  451         TAILQ_INSERT_TAIL(&slpque[LOOKUP(ident)], p, p_procq);
  452         if (timo)
  453                 thandle = timeout(endtsleep, (void *)p, timo);
  454         /*
  455          * We put ourselves on the sleep queue and start our timeout
  456          * before calling CURSIG, as we could stop there, and a wakeup
  457          * or a SIGCONT (or both) could occur while we were stopped.
  458          * A SIGCONT would cause us to be marked as SSLEEP
  459          * without resuming us, thus we must be ready for sleep
  460          * when CURSIG is called.  If the wakeup happens while we're
  461          * stopped, p->p_wchan will be 0 upon return from CURSIG.
  462          */
  463         if (catch) {
  464                 p->p_flag |= P_SINTR;
  465                 if ((sig = CURSIG(p))) {
  466                         if (p->p_wchan)
  467                                 unsleep(p);
  468                         p->p_stat = SRUN;
  469                         goto resume;
  470                 }
  471                 if (p->p_wchan == 0) {
  472                         catch = 0;
  473                         goto resume;
  474                 }
  475         } else
  476                 sig = 0;
  477         p->p_stat = SSLEEP;
  478         p->p_stats->p_ru.ru_nvcsw++;
  479         mi_switch();
  480 resume:
  481         curpriority = p->p_usrpri;
  482         splx(s);
  483         p->p_flag &= ~P_SINTR;
  484         if (p->p_flag & P_TIMEOUT) {
  485                 p->p_flag &= ~P_TIMEOUT;
  486                 if (sig == 0) {
  487 #ifdef KTRACE
  488                         if (KTRPOINT(p, KTR_CSW))
  489                                 ktrcsw(p->p_tracep, 0, 0);
  490 #endif
  491                         return (EWOULDBLOCK);
  492                 }
  493         } else if (timo)
  494                 untimeout(endtsleep, (void *)p, thandle);
  495         if (catch && (sig != 0 || (sig = CURSIG(p)))) {
  496 #ifdef KTRACE
  497                 if (KTRPOINT(p, KTR_CSW))
  498                         ktrcsw(p->p_tracep, 0, 0);
  499 #endif
  500                 if (SIGISMEMBER(p->p_sigacts->ps_sigintr, sig))
  501                         return (EINTR);
  502                 return (ERESTART);
  503         }
  504 #ifdef KTRACE
  505         if (KTRPOINT(p, KTR_CSW))
  506                 ktrcsw(p->p_tracep, 0, 0);
  507 #endif
  508         return (0);
  509 }
  510 
  511 /*
  512  * asleep() - async sleep call.  Place process on wait queue and return 
  513  * immediately without blocking.  The process stays runnable until await() 
  514  * is called.  If ident is NULL, remove process from wait queue if it is still
  515  * on one.
  516  *
  517  * Only the most recent sleep condition is effective when making successive
  518  * calls to asleep() or when calling tsleep().
  519  *
  520  * The timeout, if any, is not initiated until await() is called.  The sleep
  521  * priority, signal, and timeout is specified in the asleep() call but may be
  522  * overriden in the await() call.
  523  *
  524  * <<<<<<<< EXPERIMENTAL, UNTESTED >>>>>>>>>>
  525  */
  526 
  527 int
  528 asleep(void *ident, int priority, const char *wmesg, int timo)
  529 {
  530         struct proc *p = curproc;
  531         int s;
  532 
  533         /*
  534          * splhigh() while manipulating sleep structures and slpque.
  535          *
  536          * Remove preexisting wait condition (if any) and place process
  537          * on appropriate slpque, but do not put process to sleep.
  538          */
  539 
  540         s = splhigh();
  541 
  542         if (p->p_wchan != NULL)
  543                 unsleep(p);
  544 
  545         if (ident) {
  546                 p->p_wchan = ident;
  547                 p->p_wmesg = wmesg;
  548                 p->p_slptime = 0;
  549                 p->p_asleep.as_priority = priority;
  550                 p->p_asleep.as_timo = timo;
  551                 TAILQ_INSERT_TAIL(&slpque[LOOKUP(ident)], p, p_procq);
  552         }
  553 
  554         splx(s);
  555 
  556         return(0);
  557 }
  558 
  559 /*
  560  * await() - wait for async condition to occur.   The process blocks until
  561  * wakeup() is called on the most recent asleep() address.  If wakeup is called
  562  * priority to await(), await() winds up being a NOP.
  563  *
  564  * If await() is called more then once (without an intervening asleep() call),
  565  * await() is still effectively a NOP but it calls mi_switch() to give other
  566  * processes some cpu before returning.  The process is left runnable.
  567  *
  568  * <<<<<<<< EXPERIMENTAL, UNTESTED >>>>>>>>>>
  569  */
  570 
  571 int
  572 await(int priority, int timo)
  573 {
  574         struct proc *p = curproc;
  575         int s;
  576 
  577         s = splhigh();
  578 
  579         if (p->p_wchan != NULL) {
  580                 struct callout_handle thandle;
  581                 int sig;
  582                 int catch;
  583 
  584                 /*
  585                  * The call to await() can override defaults specified in
  586                  * the original asleep().
  587                  */
  588                 if (priority < 0)
  589                         priority = p->p_asleep.as_priority;
  590                 if (timo < 0)
  591                         timo = p->p_asleep.as_timo;
  592 
  593                 /*
  594                  * Install timeout
  595                  */
  596 
  597                 if (timo)
  598                         thandle = timeout(endtsleep, (void *)p, timo);
  599 
  600                 sig = 0;
  601                 catch = priority & PCATCH;
  602 
  603                 if (catch) {
  604                         p->p_flag |= P_SINTR;
  605                         if ((sig = CURSIG(p))) {
  606                                 if (p->p_wchan)
  607                                         unsleep(p);
  608                                 p->p_stat = SRUN;
  609                                 goto resume;
  610                         }
  611                         if (p->p_wchan == NULL) {
  612                                 catch = 0;
  613                                 goto resume;
  614                         }
  615                 }
  616                 p->p_stat = SSLEEP;
  617                 p->p_stats->p_ru.ru_nvcsw++;
  618                 mi_switch();
  619 resume:
  620                 curpriority = p->p_usrpri;
  621 
  622                 splx(s);
  623                 p->p_flag &= ~P_SINTR;
  624                 if (p->p_flag & P_TIMEOUT) {
  625                         p->p_flag &= ~P_TIMEOUT;
  626                         if (sig == 0) {
  627 #ifdef KTRACE
  628                                 if (KTRPOINT(p, KTR_CSW))
  629                                         ktrcsw(p->p_tracep, 0, 0);
  630 #endif
  631                                 return (EWOULDBLOCK);
  632                         }
  633                 } else if (timo)
  634                         untimeout(endtsleep, (void *)p, thandle);
  635                 if (catch && (sig != 0 || (sig = CURSIG(p)))) {
  636 #ifdef KTRACE
  637                         if (KTRPOINT(p, KTR_CSW))
  638                                 ktrcsw(p->p_tracep, 0, 0);
  639 #endif
  640                         if (SIGISMEMBER(p->p_sigacts->ps_sigintr, sig))
  641                                 return (EINTR);
  642                         return (ERESTART);
  643                 }
  644 #ifdef KTRACE
  645                 if (KTRPOINT(p, KTR_CSW))
  646                         ktrcsw(p->p_tracep, 0, 0);
  647 #endif
  648         } else {
  649                 /*
  650                  * If as_priority is 0, await() has been called without an 
  651                  * intervening asleep().  We are still effectively a NOP, 
  652                  * but we call mi_switch() for safety.
  653                  */
  654 
  655                 if (p->p_asleep.as_priority == 0) {
  656                         p->p_stats->p_ru.ru_nvcsw++;
  657                         mi_switch();
  658                 }
  659                 splx(s);
  660         }
  661 
  662         /*
  663          * clear p_asleep.as_priority as an indication that await() has been
  664          * called.  If await() is called again without an intervening asleep(),
  665          * await() is still effectively a NOP but the above mi_switch() code
  666          * is triggered as a safety.
  667          */
  668         p->p_asleep.as_priority = 0;
  669 
  670         return (0);
  671 }
  672 
  673 /*
  674  * Implement timeout for tsleep or asleep()/await()
  675  *
  676  * If process hasn't been awakened (wchan non-zero),
  677  * set timeout flag and undo the sleep.  If proc
  678  * is stopped, just unsleep so it will remain stopped.
  679  */
  680 static void
  681 endtsleep(arg)
  682         void *arg;
  683 {
  684         register struct proc *p;
  685         int s;
  686 
  687         p = (struct proc *)arg;
  688         s = splhigh();
  689         if (p->p_wchan) {
  690                 if (p->p_stat == SSLEEP)
  691                         setrunnable(p);
  692                 else
  693                         unsleep(p);
  694                 p->p_flag |= P_TIMEOUT;
  695         }
  696         splx(s);
  697 }
  698 
  699 /*
  700  * Remove a process from its wait queue
  701  */
  702 void
  703 unsleep(p)
  704         register struct proc *p;
  705 {
  706         int s;
  707 
  708         s = splhigh();
  709         if (p->p_wchan) {
  710                 TAILQ_REMOVE(&slpque[LOOKUP(p->p_wchan)], p, p_procq);
  711                 p->p_wchan = 0;
  712         }
  713         splx(s);
  714 }
  715 
  716 /*
  717  * Make all processes sleeping on the specified identifier runnable.
  718  */
  719 void
  720 wakeup(ident)
  721         register void *ident;
  722 {
  723         register struct slpquehead *qp;
  724         register struct proc *p;
  725         struct proc *np;
  726         int s;
  727 
  728         s = splhigh();
  729         qp = &slpque[LOOKUP(ident)];
  730 restart:
  731         for (p = TAILQ_FIRST(qp); p != NULL; p = np) {
  732                 np = TAILQ_NEXT(p, p_procq);
  733                 if (p->p_wchan == ident) {
  734                         TAILQ_REMOVE(qp, p, p_procq);
  735                         p->p_wchan = 0;
  736                         if (p->p_stat == SSLEEP) {
  737                                 /* OPTIMIZED EXPANSION OF setrunnable(p); */
  738                                 if (p->p_slptime > 1)
  739                                         updatepri(p);
  740                                 p->p_slptime = 0;
  741                                 p->p_stat = SRUN;
  742                                 if (p->p_flag & P_INMEM) {
  743                                         setrunqueue(p);
  744                                         maybe_resched(p);
  745                                 } else {
  746                                         p->p_flag |= P_SWAPINREQ;
  747                                         wakeup((caddr_t)&proc0);
  748                                 }
  749                                 /* END INLINE EXPANSION */
  750                                 goto restart;
  751                         }
  752                 }
  753         }
  754         splx(s);
  755 }
  756 
  757 /*
  758  * Make a process sleeping on the specified identifier runnable.
  759  * May wake more than one process if a target process is currently
  760  * swapped out.
  761  */
  762 void
  763 wakeup_one(ident)
  764         register void *ident;
  765 {
  766         register struct slpquehead *qp;
  767         register struct proc *p;
  768         struct proc *np;
  769         int s;
  770 
  771         s = splhigh();
  772         qp = &slpque[LOOKUP(ident)];
  773 
  774 restart:
  775         for (p = TAILQ_FIRST(qp); p != NULL; p = np) {
  776                 np = TAILQ_NEXT(p, p_procq);
  777                 if (p->p_wchan == ident) {
  778                         TAILQ_REMOVE(qp, p, p_procq);
  779                         p->p_wchan = 0;
  780                         if (p->p_stat == SSLEEP) {
  781                                 /* OPTIMIZED EXPANSION OF setrunnable(p); */
  782                                 if (p->p_slptime > 1)
  783                                         updatepri(p);
  784                                 p->p_slptime = 0;
  785                                 p->p_stat = SRUN;
  786                                 if (p->p_flag & P_INMEM) {
  787                                         setrunqueue(p);
  788                                         maybe_resched(p);
  789                                         break;
  790                                 } else {
  791                                         p->p_flag |= P_SWAPINREQ;
  792                                         wakeup((caddr_t)&proc0);
  793                                 }
  794                                 /* END INLINE EXPANSION */
  795                                 goto restart;
  796                         }
  797                 }
  798         }
  799         splx(s);
  800 }
  801 
  802 /*
  803  * The machine independent parts of mi_switch().
  804  * Must be called at splstatclock() or higher.
  805  */
  806 void
  807 mi_switch()
  808 {
  809         struct timeval new_switchtime;
  810         register struct proc *p = curproc;      /* XXX */
  811         register struct rlimit *rlim;
  812         int x;
  813 
  814         /*
  815          * XXX this spl is almost unnecessary.  It is partly to allow for
  816          * sloppy callers that don't do it (issignal() via CURSIG() is the
  817          * main offender).  It is partly to work around a bug in the i386
  818          * cpu_switch() (the ipl is not preserved).  We ran for years
  819          * without it.  I think there was only a interrupt latency problem.
  820          * The main caller, tsleep(), does an splx() a couple of instructions
  821          * after calling here.  The buggy caller, issignal(), usually calls
  822          * here at spl0() and sometimes returns at splhigh().  The process
  823          * then runs for a little too long at splhigh().  The ipl gets fixed
  824          * when the process returns to user mode (or earlier).
  825          *
  826          * It would probably be better to always call here at spl0(). Callers
  827          * are prepared to give up control to another process, so they must
  828          * be prepared to be interrupted.  The clock stuff here may not
  829          * actually need splstatclock().
  830          */
  831         x = splstatclock();
  832 
  833 #ifdef SIMPLELOCK_DEBUG
  834         if (p->p_simple_locks)
  835                 printf("sleep: holding simple lock\n");
  836 #endif
  837         /*
  838          * Compute the amount of time during which the current
  839          * process was running, and add that to its total so far.
  840          */
  841         microuptime(&new_switchtime);
  842         if (timevalcmp(&new_switchtime, &switchtime, <)) {
  843                 printf("microuptime() went backwards (%ld.%06ld -> %ld.%06ld)\n",
  844                     switchtime.tv_sec, switchtime.tv_usec, 
  845                     new_switchtime.tv_sec, new_switchtime.tv_usec);
  846                 new_switchtime = switchtime;
  847         } else {
  848                 p->p_runtime += (new_switchtime.tv_usec - switchtime.tv_usec) +
  849                     (new_switchtime.tv_sec - switchtime.tv_sec) * (int64_t)1000000;
  850         }
  851 
  852         /*
  853          * Check if the process exceeds its cpu resource allocation.
  854          * If over max, kill it.
  855          */
  856         if (p->p_stat != SZOMB && p->p_limit->p_cpulimit != RLIM_INFINITY &&
  857             p->p_runtime > p->p_limit->p_cpulimit) {
  858                 rlim = &p->p_rlimit[RLIMIT_CPU];
  859                 if (p->p_runtime / (rlim_t)1000000 >= rlim->rlim_max) {
  860                         killproc(p, "exceeded maximum CPU limit");
  861                 } else {
  862                         psignal(p, SIGXCPU);
  863                         if (rlim->rlim_cur < rlim->rlim_max) {
  864                                 /* XXX: we should make a private copy */
  865                                 rlim->rlim_cur += 5;
  866                         }
  867                 }
  868         }
  869 
  870         /*
  871          * Pick a new current process and record its start time.
  872          */
  873         cnt.v_swtch++;
  874         switchtime = new_switchtime;
  875         cpu_switch(p);
  876         if (switchtime.tv_sec == 0)
  877                 microuptime(&switchtime);
  878         switchticks = ticks;
  879 
  880         splx(x);
  881 }
  882 
  883 /*
  884  * Change process state to be runnable,
  885  * placing it on the run queue if it is in memory,
  886  * and awakening the swapper if it isn't in memory.
  887  */
  888 void
  889 setrunnable(p)
  890         register struct proc *p;
  891 {
  892         register int s;
  893 
  894         s = splhigh();
  895         switch (p->p_stat) {
  896         case 0:
  897         case SRUN:
  898         case SZOMB:
  899         default:
  900                 panic("setrunnable");
  901         case SSTOP:
  902         case SSLEEP:
  903                 unsleep(p);             /* e.g. when sending signals */
  904                 break;
  905 
  906         case SIDL:
  907                 break;
  908         }
  909         p->p_stat = SRUN;
  910         if (p->p_flag & P_INMEM)
  911                 setrunqueue(p);
  912         splx(s);
  913         if (p->p_slptime > 1)
  914                 updatepri(p);
  915         p->p_slptime = 0;
  916         if ((p->p_flag & P_INMEM) == 0) {
  917                 p->p_flag |= P_SWAPINREQ;
  918                 wakeup((caddr_t)&proc0);
  919         }
  920         else
  921                 maybe_resched(p);
  922 }
  923 
  924 /*
  925  * Compute the priority of a process when running in user mode.
  926  * Arrange to reschedule if the resulting priority is better
  927  * than that of the current process.
  928  */
  929 void
  930 resetpriority(p)
  931         register struct proc *p;
  932 {
  933         register unsigned int newpriority;
  934 
  935         if (p->p_rtprio.type == RTP_PRIO_NORMAL) {
  936                 newpriority = PUSER + p->p_estcpu / INVERSE_ESTCPU_WEIGHT +
  937                     NICE_WEIGHT * p->p_nice;
  938                 newpriority = min(newpriority, MAXPRI);
  939                 p->p_usrpri = newpriority;
  940         }
  941         maybe_resched(p);
  942 }
  943 
  944 /*
  945  * Compute a tenex style load average of a quantity on
  946  * 1, 5 and 15 minute intervals.
  947  */
  948 static void
  949 loadav(void *arg)
  950 {
  951         int i, nrun;
  952         struct loadavg *avg;
  953         struct proc *p;
  954 
  955         avg = &averunnable;
  956         nrun = 0;
  957         LIST_FOREACH(p, &allproc, p_list) {
  958                 switch (p->p_stat) {
  959                 case SRUN:
  960                 case SIDL:
  961                         nrun++;
  962                 }
  963         }
  964         for (i = 0; i < 3; i++)
  965                 avg->ldavg[i] = (cexp[i] * avg->ldavg[i] +
  966                     nrun * FSCALE * (FSCALE - cexp[i])) >> FSHIFT;
  967 
  968         /*
  969          * Schedule the next update to occur after 5 seconds, but add a
  970          * random variation to avoid synchronisation with processes that
  971          * run at regular intervals.
  972          */
  973         callout_reset(&loadav_callout, hz * 4 + (int)(random() % (hz * 2 + 1)),
  974             loadav, NULL);
  975 }
  976 
  977 /* ARGSUSED */
  978 static void
  979 sched_setup(dummy)
  980         void *dummy;
  981 {
  982 
  983         callout_init(&loadav_callout);
  984 
  985         /* Kick off timeout driven events by calling first time. */
  986         roundrobin(NULL);
  987         schedcpu(NULL);
  988         loadav(NULL);
  989 }
  990 
  991 /*
  992  * We adjust the priority of the current process.  The priority of
  993  * a process gets worse as it accumulates CPU time.  The cpu usage
  994  * estimator (p_estcpu) is increased here.  resetpriority() will
  995  * compute a different priority each time p_estcpu increases by
  996  * INVERSE_ESTCPU_WEIGHT
  997  * (until MAXPRI is reached).  The cpu usage estimator ramps up
  998  * quite quickly when the process is running (linearly), and decays
  999  * away exponentially, at a rate which is proportionally slower when
 1000  * the system is busy.  The basic principle is that the system will
 1001  * 90% forget that the process used a lot of CPU time in 5 * loadav
 1002  * seconds.  This causes the system to favor processes which haven't
 1003  * run much recently, and to round-robin among other processes.
 1004  */
 1005 void
 1006 schedclock(p)
 1007         struct proc *p;
 1008 {
 1009 
 1010         p->p_cpticks++;
 1011         p->p_estcpu = ESTCPULIM(p->p_estcpu + 1);
 1012         if ((p->p_estcpu % INVERSE_ESTCPU_WEIGHT) == 0) {
 1013                 resetpriority(p);
 1014                 if (p->p_priority >= PUSER)
 1015                         p->p_priority = p->p_usrpri;
 1016         }
 1017 }
Cache object: 5823f10d334248e1e4acad5cd6d1b467
[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]
This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.
FreeBSD/Linux Kernel Cross Reference sys/kern/kern_synch.c

FreeBSD/Linux Kernel Cross Reference
sys/kern/kern_synch.c