kern_synch.c

Version: - FREEBSD - FREEBSD-13-STABLE - FREEBSD-13-0 - FREEBSD-12-STABLE - FREEBSD-12-0 - FREEBSD-11-STABLE - FREEBSD-11-0 - FREEBSD-10-STABLE - FREEBSD-10-0 - FREEBSD-9-STABLE - FREEBSD-9-0 - FREEBSD-8-STABLE - FREEBSD-8-0 - FREEBSD-7-STABLE - FREEBSD-7-0 - FREEBSD-6-STABLE - FREEBSD-6-0 - FREEBSD-5-STABLE - FREEBSD-5-0 - FREEBSD-4-STABLE - FREEBSD-3-STABLE - FREEBSD22 - l41 - OPENBSD - linux-2.6 - MK84 - PLAN9 - xnu-8792
SearchContext: - none - 3 - 10
    1 /*      $NetBSD: kern_synch.c,v 1.353 2022/12/05 15:47:14 martin Exp $  */
    2 
    3 /*-
    4  * Copyright (c) 1999, 2000, 2004, 2006, 2007, 2008, 2009, 2019, 2020
    5  *    The NetBSD Foundation, Inc.
    6  * All rights reserved.
    7  *
    8  * This code is derived from software contributed to The NetBSD Foundation
    9  * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
   10  * NASA Ames Research Center, by Charles M. Hannum, Andrew Doran and
   11  * Daniel Sieger.
   12  *
   13  * Redistribution and use in source and binary forms, with or without
   14  * modification, are permitted provided that the following conditions
   15  * are met:
   16  * 1. Redistributions of source code must retain the above copyright
   17  *    notice, this list of conditions and the following disclaimer.
   18  * 2. Redistributions in binary form must reproduce the above copyright
   19  *    notice, this list of conditions and the following disclaimer in the
   20  *    documentation and/or other materials provided with the distribution.
   21  *
   22  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
   23  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
   24  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
   25  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
   26  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
   27  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
   28  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
   29  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
   30  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
   31  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   32  * POSSIBILITY OF SUCH DAMAGE.
   33  */
   34 
   35 /*-
   36  * Copyright (c) 1982, 1986, 1990, 1991, 1993
   37  *      The Regents of the University of California.  All rights reserved.
   38  * (c) UNIX System Laboratories, Inc.
   39  * All or some portions of this file are derived from material licensed
   40  * to the University of California by American Telephone and Telegraph
   41  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
   42  * the permission of UNIX System Laboratories, Inc.
   43  *
   44  * Redistribution and use in source and binary forms, with or without
   45  * modification, are permitted provided that the following conditions
   46  * are met:
   47  * 1. Redistributions of source code must retain the above copyright
   48  *    notice, this list of conditions and the following disclaimer.
   49  * 2. Redistributions in binary form must reproduce the above copyright
   50  *    notice, this list of conditions and the following disclaimer in the
   51  *    documentation and/or other materials provided with the distribution.
   52  * 3. Neither the name of the University nor the names of its contributors
   53  *    may be used to endorse or promote products derived from this software
   54  *    without specific prior written permission.
   55  *
   56  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   57  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   58  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   59  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   60  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   61  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   62  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   63  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   64  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   65  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   66  * SUCH DAMAGE.
   67  *
   68  *      @(#)kern_synch.c        8.9 (Berkeley) 5/19/95
   69  */
   70 
   71 #include <sys/cdefs.h>
   72 __KERNEL_RCSID(0, "$NetBSD: kern_synch.c,v 1.353 2022/12/05 15:47:14 martin Exp $");
   73 
   74 #include "opt_kstack.h"
   75 #include "opt_dtrace.h"
   76 
   77 #define __MUTEX_PRIVATE
   78 
   79 #include <sys/param.h>
   80 #include <sys/systm.h>
   81 #include <sys/proc.h>
   82 #include <sys/kernel.h>
   83 #include <sys/cpu.h>
   84 #include <sys/pserialize.h>
   85 #include <sys/resource.h>
   86 #include <sys/resourcevar.h>
   87 #include <sys/rwlock.h>
   88 #include <sys/sched.h>
   89 #include <sys/syscall_stats.h>
   90 #include <sys/sleepq.h>
   91 #include <sys/lockdebug.h>
   92 #include <sys/evcnt.h>
   93 #include <sys/intr.h>
   94 #include <sys/lwpctl.h>
   95 #include <sys/atomic.h>
   96 #include <sys/syslog.h>
   97 
   98 #include <uvm/uvm_extern.h>
   99 
  100 #include <dev/lockstat.h>
  101 
  102 #include <sys/dtrace_bsd.h>
  103 int                             dtrace_vtime_active=0;
  104 dtrace_vtime_switch_func_t      dtrace_vtime_switch_func;
  105 
  106 static void     sched_unsleep(struct lwp *, bool);
  107 static void     sched_changepri(struct lwp *, pri_t);
  108 static void     sched_lendpri(struct lwp *, pri_t);
  109 
  110 syncobj_t sleep_syncobj = {
  111         .sobj_flag      = SOBJ_SLEEPQ_SORTED,
  112         .sobj_unsleep   = sleepq_unsleep,
  113         .sobj_changepri = sleepq_changepri,
  114         .sobj_lendpri   = sleepq_lendpri,
  115         .sobj_owner     = syncobj_noowner,
  116 };
  117 
  118 syncobj_t sched_syncobj = {
  119         .sobj_flag      = SOBJ_SLEEPQ_SORTED,
  120         .sobj_unsleep   = sched_unsleep,
  121         .sobj_changepri = sched_changepri,
  122         .sobj_lendpri   = sched_lendpri,
  123         .sobj_owner     = syncobj_noowner,
  124 };
  125 
  126 syncobj_t kpause_syncobj = {
  127         .sobj_flag      = SOBJ_SLEEPQ_NULL,
  128         .sobj_unsleep   = sleepq_unsleep,
  129         .sobj_changepri = sleepq_changepri,
  130         .sobj_lendpri   = sleepq_lendpri,
  131         .sobj_owner     = syncobj_noowner,
  132 };
  133 
  134 /* "Lightning bolt": once a second sleep address. */
  135 kcondvar_t              lbolt                   __cacheline_aligned;
  136 
  137 u_int                   sched_pstats_ticks      __cacheline_aligned;
  138 
  139 /* Preemption event counters. */
  140 static struct evcnt     kpreempt_ev_crit        __cacheline_aligned;
  141 static struct evcnt     kpreempt_ev_klock       __cacheline_aligned;
  142 static struct evcnt     kpreempt_ev_immed       __cacheline_aligned;
  143 
  144 void
  145 synch_init(void)
  146 {
  147 
  148         cv_init(&lbolt, "lbolt");
  149 
  150         evcnt_attach_dynamic(&kpreempt_ev_crit, EVCNT_TYPE_MISC, NULL,
  151            "kpreempt", "defer: critical section");
  152         evcnt_attach_dynamic(&kpreempt_ev_klock, EVCNT_TYPE_MISC, NULL,
  153            "kpreempt", "defer: kernel_lock");
  154         evcnt_attach_dynamic(&kpreempt_ev_immed, EVCNT_TYPE_MISC, NULL,
  155            "kpreempt", "immediate");
  156 }
  157 
  158 /*
  159  * OBSOLETE INTERFACE
  160  *
  161  * General sleep call.  Suspends the current LWP until a wakeup is
  162  * performed on the specified identifier.  The LWP will then be made
  163  * runnable with the specified priority.  Sleeps at most timo/hz seconds (0
  164  * means no timeout).  If pri includes PCATCH flag, signals are checked
  165  * before and after sleeping, else signals are not checked.  Returns 0 if
  166  * awakened, EWOULDBLOCK if the timeout expires.  If PCATCH is set and a
  167  * signal needs to be delivered, ERESTART is returned if the current system
  168  * call should be restarted if possible, and EINTR is returned if the system
  169  * call should be interrupted by the signal (return EINTR).
  170  */
  171 int
  172 tsleep(wchan_t ident, pri_t priority, const char *wmesg, int timo)
  173 {
  174         struct lwp *l = curlwp;
  175         sleepq_t *sq;
  176         kmutex_t *mp;
  177         bool catch_p;
  178 
  179         KASSERT((l->l_pflag & LP_INTR) == 0);
  180         KASSERT(ident != &lbolt);
  181 
  182         if (sleepq_dontsleep(l)) {
  183                 (void)sleepq_abort(NULL, 0);
  184                 return 0;
  185         }
  186 
  187         l->l_kpriority = true;
  188         catch_p = priority & PCATCH;
  189         sq = sleeptab_lookup(&sleeptab, ident, &mp);
  190         sleepq_enter(sq, l, mp);
  191         sleepq_enqueue(sq, ident, wmesg, &sleep_syncobj, catch_p);
  192         return sleepq_block(timo, catch_p, &sleep_syncobj);
  193 }
  194 
  195 int
  196 mtsleep(wchan_t ident, pri_t priority, const char *wmesg, int timo,
  197         kmutex_t *mtx)
  198 {
  199         struct lwp *l = curlwp;
  200         sleepq_t *sq;
  201         kmutex_t *mp;
  202         bool catch_p;
  203         int error;
  204 
  205         KASSERT((l->l_pflag & LP_INTR) == 0);
  206         KASSERT(ident != &lbolt);
  207 
  208         if (sleepq_dontsleep(l)) {
  209                 (void)sleepq_abort(mtx, (priority & PNORELOCK) != 0);
  210                 return 0;
  211         }
  212 
  213         l->l_kpriority = true;
  214         catch_p = priority & PCATCH;
  215         sq = sleeptab_lookup(&sleeptab, ident, &mp);
  216         sleepq_enter(sq, l, mp);
  217         sleepq_enqueue(sq, ident, wmesg, &sleep_syncobj, catch_p);
  218         mutex_exit(mtx);
  219         error = sleepq_block(timo, catch_p, &sleep_syncobj);
  220 
  221         if ((priority & PNORELOCK) == 0)
  222                 mutex_enter(mtx);
  223 
  224         return error;
  225 }
  226 
  227 /*
  228  * General sleep call for situations where a wake-up is not expected.
  229  */
  230 int
  231 kpause(const char *wmesg, bool intr, int timo, kmutex_t *mtx)
  232 {
  233         struct lwp *l = curlwp;
  234         int error;
  235 
  236         KASSERT(!(timo == 0 && intr == false));
  237 
  238         if (sleepq_dontsleep(l))
  239                 return sleepq_abort(NULL, 0);
  240 
  241         if (mtx != NULL)
  242                 mutex_exit(mtx);
  243         l->l_kpriority = true;
  244         lwp_lock(l);
  245         KERNEL_UNLOCK_ALL(NULL, &l->l_biglocks);
  246         sleepq_enqueue(NULL, l, wmesg, &kpause_syncobj, intr);
  247         error = sleepq_block(timo, intr, &kpause_syncobj);
  248         if (mtx != NULL)
  249                 mutex_enter(mtx);
  250 
  251         return error;
  252 }
  253 
  254 /*
  255  * OBSOLETE INTERFACE
  256  *
  257  * Make all LWPs sleeping on the specified identifier runnable.
  258  */
  259 void
  260 wakeup(wchan_t ident)
  261 {
  262         sleepq_t *sq;
  263         kmutex_t *mp;
  264 
  265         if (__predict_false(cold))
  266                 return;
  267 
  268         sq = sleeptab_lookup(&sleeptab, ident, &mp);
  269         sleepq_wake(sq, ident, (u_int)-1, mp);
  270 }
  271 
  272 /*
  273  * General yield call.  Puts the current LWP back on its run queue and
  274  * performs a context switch.
  275  */
  276 void
  277 yield(void)
  278 {
  279         struct lwp *l = curlwp;
  280 
  281         KERNEL_UNLOCK_ALL(l, &l->l_biglocks);
  282         lwp_lock(l);
  283 
  284         KASSERT(lwp_locked(l, l->l_cpu->ci_schedstate.spc_lwplock));
  285         KASSERT(l->l_stat == LSONPROC);
  286 
  287         /* Voluntary - ditch kpriority boost. */
  288         l->l_kpriority = false;
  289         spc_lock(l->l_cpu);
  290         mi_switch(l);
  291         KERNEL_LOCK(l->l_biglocks, l);
  292 }
  293 
  294 /*
  295  * General preemption call.  Puts the current LWP back on its run queue
  296  * and performs an involuntary context switch.  Different from yield()
  297  * in that:
  298  *
  299  * - It's counted differently (involuntary vs. voluntary).
  300  * - Realtime threads go to the head of their runqueue vs. tail for yield().
  301  * - Priority boost is retained unless LWP has exceeded timeslice.
  302  */
  303 void
  304 preempt(void)
  305 {
  306         struct lwp *l = curlwp;
  307 
  308         KERNEL_UNLOCK_ALL(l, &l->l_biglocks);
  309         lwp_lock(l);
  310 
  311         KASSERT(lwp_locked(l, l->l_cpu->ci_schedstate.spc_lwplock));
  312         KASSERT(l->l_stat == LSONPROC);
  313 
  314         spc_lock(l->l_cpu);
  315         /* Involuntary - keep kpriority boost unless a CPU hog. */
  316         if ((l->l_cpu->ci_schedstate.spc_flags & SPCF_SHOULDYIELD) != 0) {
  317                 l->l_kpriority = false;
  318         }
  319         l->l_pflag |= LP_PREEMPTING;
  320         mi_switch(l);
  321         KERNEL_LOCK(l->l_biglocks, l);
  322 }
  323 
  324 /*
  325  * Return true if the current LWP should yield the processor.  Intended to
  326  * be used by long-running code in kernel.
  327  */
  328 inline bool
  329 preempt_needed(void)
  330 {
  331         lwp_t *l = curlwp;
  332         int needed;
  333 
  334         KPREEMPT_DISABLE(l);
  335         needed = l->l_cpu->ci_want_resched;
  336         KPREEMPT_ENABLE(l);
  337 
  338         return (needed != 0);
  339 }
  340 
  341 /*
  342  * A breathing point for long running code in kernel.
  343  */
  344 void
  345 preempt_point(void)
  346 {
  347 
  348         if (__predict_false(preempt_needed())) {
  349                 preempt();
  350         }
  351 }
  352 
  353 /*
  354  * Handle a request made by another agent to preempt the current LWP
  355  * in-kernel.  Usually called when l_dopreempt may be non-zero.
  356  *
  357  * Character addresses for lockstat only.
  358  */
  359 static char     kpreempt_is_disabled;
  360 static char     kernel_lock_held;
  361 static char     is_softint_lwp;
  362 static char     spl_is_raised;
  363 
  364 bool
  365 kpreempt(uintptr_t where)
  366 {
  367         uintptr_t failed;
  368         lwp_t *l;
  369         int s, dop, lsflag;
  370 
  371         l = curlwp;
  372         failed = 0;
  373         while ((dop = l->l_dopreempt) != 0) {
  374                 if (l->l_stat != LSONPROC) {
  375                         /*
  376                          * About to block (or die), let it happen.
  377                          * Doesn't really count as "preemption has
  378                          * been blocked", since we're going to
  379                          * context switch.
  380                          */
  381                         atomic_swap_uint(&l->l_dopreempt, 0);
  382                         return true;
  383                 }
  384                 KASSERT((l->l_flag & LW_IDLE) == 0);
  385                 if (__predict_false(l->l_nopreempt != 0)) {
  386                         /* LWP holds preemption disabled, explicitly. */
  387                         if ((dop & DOPREEMPT_COUNTED) == 0) {
  388                                 kpreempt_ev_crit.ev_count++;
  389                         }
  390                         failed = (uintptr_t)&kpreempt_is_disabled;
  391                         break;
  392                 }
  393                 if (__predict_false((l->l_pflag & LP_INTR) != 0)) {
  394                         /* Can't preempt soft interrupts yet. */
  395                         atomic_swap_uint(&l->l_dopreempt, 0);
  396                         failed = (uintptr_t)&is_softint_lwp;
  397                         break;
  398                 }
  399                 s = splsched();
  400                 if (__predict_false(l->l_blcnt != 0 ||
  401                     curcpu()->ci_biglock_wanted != NULL)) {
  402                         /* Hold or want kernel_lock, code is not MT safe. */
  403                         splx(s);
  404                         if ((dop & DOPREEMPT_COUNTED) == 0) {
  405                                 kpreempt_ev_klock.ev_count++;
  406                         }
  407                         failed = (uintptr_t)&kernel_lock_held;
  408                         break;
  409                 }
  410                 if (__predict_false(!cpu_kpreempt_enter(where, s))) {
  411                         /*
  412                          * It may be that the IPL is too high.
  413                          * kpreempt_enter() can schedule an
  414                          * interrupt to retry later.
  415                          */
  416                         splx(s);
  417                         failed = (uintptr_t)&spl_is_raised;
  418                         break;
  419                 }
  420                 /* Do it! */
  421                 if (__predict_true((dop & DOPREEMPT_COUNTED) == 0)) {
  422                         kpreempt_ev_immed.ev_count++;
  423                 }
  424                 lwp_lock(l);
  425                 /* Involuntary - keep kpriority boost. */
  426                 l->l_pflag |= LP_PREEMPTING;
  427                 spc_lock(l->l_cpu);
  428                 mi_switch(l);
  429                 l->l_nopreempt++;
  430                 splx(s);
  431 
  432                 /* Take care of any MD cleanup. */
  433                 cpu_kpreempt_exit(where);
  434                 l->l_nopreempt--;
  435         }
  436 
  437         if (__predict_true(!failed)) {
  438                 return false;
  439         }
  440 
  441         /* Record preemption failure for reporting via lockstat. */
  442         atomic_or_uint(&l->l_dopreempt, DOPREEMPT_COUNTED);
  443         lsflag = 0;
  444         LOCKSTAT_ENTER(lsflag);
  445         if (__predict_false(lsflag)) {
  446                 if (where == 0) {
  447                         where = (uintptr_t)__builtin_return_address(0);
  448                 }
  449                 /* Preemption is on, might recurse, so make it atomic. */
  450                 if (atomic_cas_ptr_ni((void *)&l->l_pfailaddr, NULL,
  451                     (void *)where) == NULL) {
  452                         LOCKSTAT_START_TIMER(lsflag, l->l_pfailtime);
  453                         l->l_pfaillock = failed;
  454                 }
  455         }
  456         LOCKSTAT_EXIT(lsflag);
  457         return true;
  458 }
  459 
  460 /*
  461  * Return true if preemption is explicitly disabled.
  462  */
  463 bool
  464 kpreempt_disabled(void)
  465 {
  466         const lwp_t *l = curlwp;
  467 
  468         return l->l_nopreempt != 0 || l->l_stat == LSZOMB ||
  469             (l->l_flag & LW_IDLE) != 0 || (l->l_pflag & LP_INTR) != 0 ||
  470             cpu_kpreempt_disabled();
  471 }
  472 
  473 /*
  474  * Disable kernel preemption.
  475  */
  476 void
  477 kpreempt_disable(void)
  478 {
  479 
  480         KPREEMPT_DISABLE(curlwp);
  481 }
  482 
  483 /*
  484  * Reenable kernel preemption.
  485  */
  486 void
  487 kpreempt_enable(void)
  488 {
  489 
  490         KPREEMPT_ENABLE(curlwp);
  491 }
  492 
  493 /*
  494  * Compute the amount of time during which the current lwp was running.
  495  *
  496  * - update l_rtime unless it's an idle lwp.
  497  */
  498 
  499 void
  500 updatertime(lwp_t *l, const struct bintime *now)
  501 {
  502 
  503         if (__predict_false(l->l_flag & LW_IDLE))
  504                 return;
  505 
  506         /* rtime += now - stime */
  507         bintime_add(&l->l_rtime, now);
  508         bintime_sub(&l->l_rtime, &l->l_stime);
  509 }
  510 
  511 /*
  512  * Select next LWP from the current CPU to run..
  513  */
  514 static inline lwp_t *
  515 nextlwp(struct cpu_info *ci, struct schedstate_percpu *spc)
  516 {
  517         lwp_t *newl;
  518 
  519         /*
  520          * Let sched_nextlwp() select the LWP to run the CPU next.
  521          * If no LWP is runnable, select the idle LWP.
  522          * 
  523          * On arrival here LWPs on a run queue are locked by spc_mutex which
  524          * is currently held.  Idle LWPs are always locked by spc_lwplock,
  525          * which may or may not be held here.  On exit from this code block,
  526          * in all cases newl is locked by spc_lwplock.
  527          */
  528         newl = sched_nextlwp();
  529         if (newl != NULL) {
  530                 sched_dequeue(newl);
  531                 KASSERT(lwp_locked(newl, spc->spc_mutex));
  532                 KASSERT(newl->l_cpu == ci);
  533                 newl->l_stat = LSONPROC;
  534                 newl->l_pflag |= LP_RUNNING;
  535                 spc->spc_curpriority = lwp_eprio(newl);
  536                 spc->spc_flags &= ~(SPCF_SWITCHCLEAR | SPCF_IDLE);
  537                 lwp_setlock(newl, spc->spc_lwplock);
  538         } else {
  539                 /*
  540                  * The idle LWP does not get set to LSONPROC, because
  541                  * otherwise it screws up the output from top(1) etc.
  542                  */
  543                 newl = ci->ci_data.cpu_idlelwp;
  544                 newl->l_pflag |= LP_RUNNING;
  545                 spc->spc_curpriority = PRI_IDLE;
  546                 spc->spc_flags = (spc->spc_flags & ~SPCF_SWITCHCLEAR) |
  547                     SPCF_IDLE;
  548         }
  549 
  550         /*
  551          * Only clear want_resched if there are no pending (slow) software
  552          * interrupts.  We can do this without an atomic, because no new
  553          * LWPs can appear in the queue due to our hold on spc_mutex, and
  554          * the update to ci_want_resched will become globally visible before
  555          * the release of spc_mutex becomes globally visible.
  556          */
  557         if (ci->ci_data.cpu_softints == 0)
  558                 ci->ci_want_resched = 0;
  559 
  560         return newl;
  561 }
  562 
  563 /*
  564  * The machine independent parts of context switch.
  565  *
  566  * NOTE: l->l_cpu is not changed in this routine, because an LWP never
  567  * changes its own l_cpu (that would screw up curcpu on many ports and could
  568  * cause all kinds of other evil stuff).  l_cpu is always changed by some
  569  * other actor, when it's known the LWP is not running (the LP_RUNNING flag
  570  * is checked under lock).
  571  */
  572 void
  573 mi_switch(lwp_t *l)
  574 {
  575         struct cpu_info *ci;
  576         struct schedstate_percpu *spc;
  577         struct lwp *newl;
  578         kmutex_t *lock;
  579         int oldspl;
  580         struct bintime bt;
  581         bool returning;
  582 
  583         KASSERT(lwp_locked(l, NULL));
  584         KASSERT(kpreempt_disabled());
  585         KASSERT(mutex_owned(curcpu()->ci_schedstate.spc_mutex));
  586         KASSERTMSG(l->l_blcnt == 0, "kernel_lock leaked");
  587 
  588         kstack_check_magic(l);
  589 
  590         binuptime(&bt);
  591 
  592         KASSERTMSG(l == curlwp, "l %p curlwp %p", l, curlwp);
  593         KASSERT((l->l_pflag & LP_RUNNING) != 0);
  594         KASSERT(l->l_cpu == curcpu() || l->l_stat == LSRUN);
  595         ci = curcpu();
  596         spc = &ci->ci_schedstate;
  597         returning = false;
  598         newl = NULL;
  599 
  600         /*
  601          * If we have been asked to switch to a specific LWP, then there
  602          * is no need to inspect the run queues.  If a soft interrupt is
  603          * blocking, then return to the interrupted thread without adjusting
  604          * VM context or its start time: neither have been changed in order
  605          * to take the interrupt.
  606          */
  607         if (l->l_switchto != NULL) {
  608                 if ((l->l_pflag & LP_INTR) != 0) {
  609                         returning = true;
  610                         softint_block(l);
  611                         if ((l->l_pflag & LP_TIMEINTR) != 0)
  612                                 updatertime(l, &bt);
  613                 }
  614                 newl = l->l_switchto;
  615                 l->l_switchto = NULL;
  616         }
  617 #ifndef __HAVE_FAST_SOFTINTS
  618         else if (ci->ci_data.cpu_softints != 0) {
  619                 /* There are pending soft interrupts, so pick one. */
  620                 newl = softint_picklwp();
  621                 newl->l_stat = LSONPROC;
  622                 newl->l_pflag |= LP_RUNNING;
  623         }
  624 #endif  /* !__HAVE_FAST_SOFTINTS */
  625 
  626         /*
  627          * If on the CPU and we have gotten this far, then we must yield.
  628          */
  629         if (l->l_stat == LSONPROC && l != newl) {
  630                 KASSERT(lwp_locked(l, spc->spc_lwplock));
  631                 KASSERT((l->l_flag & LW_IDLE) == 0);
  632                 l->l_stat = LSRUN;
  633                 lwp_setlock(l, spc->spc_mutex);
  634                 sched_enqueue(l);
  635                 sched_preempted(l);
  636 
  637                 /*
  638                  * Handle migration.  Note that "migrating LWP" may
  639                  * be reset here, if interrupt/preemption happens
  640                  * early in idle LWP.
  641                  */
  642                 if (l->l_target_cpu != NULL && (l->l_pflag & LP_BOUND) == 0) {
  643                         KASSERT((l->l_pflag & LP_INTR) == 0);
  644                         spc->spc_migrating = l;
  645                 }
  646         }
  647 
  648         /* Pick new LWP to run. */
  649         if (newl == NULL) {
  650                 newl = nextlwp(ci, spc);
  651         }
  652 
  653         /* Items that must be updated with the CPU locked. */
  654         if (!returning) {
  655                 /* Count time spent in current system call */
  656                 SYSCALL_TIME_SLEEP(l);
  657 
  658                 updatertime(l, &bt);
  659 
  660                 /* Update the new LWP's start time. */
  661                 newl->l_stime = bt;
  662 
  663                 /*
  664                  * ci_curlwp changes when a fast soft interrupt occurs.
  665                  * We use ci_onproc to keep track of which kernel or
  666                  * user thread is running 'underneath' the software
  667                  * interrupt.  This is important for time accounting,
  668                  * itimers and forcing user threads to preempt (aston).
  669                  */
  670                 ci->ci_onproc = newl;
  671         }
  672 
  673         /*
  674          * Preemption related tasks.  Must be done holding spc_mutex.  Clear
  675          * l_dopreempt without an atomic - it's only ever set non-zero by
  676          * sched_resched_cpu() which also holds spc_mutex, and only ever
  677          * cleared by the LWP itself (us) with atomics when not under lock.
  678          */
  679         l->l_dopreempt = 0;
  680         if (__predict_false(l->l_pfailaddr != 0)) {
  681                 LOCKSTAT_FLAG(lsflag);
  682                 LOCKSTAT_ENTER(lsflag);
  683                 LOCKSTAT_STOP_TIMER(lsflag, l->l_pfailtime);
  684                 LOCKSTAT_EVENT_RA(lsflag, l->l_pfaillock, LB_NOPREEMPT|LB_SPIN,
  685                     1, l->l_pfailtime, l->l_pfailaddr);
  686                 LOCKSTAT_EXIT(lsflag);
  687                 l->l_pfailtime = 0;
  688                 l->l_pfaillock = 0;
  689                 l->l_pfailaddr = 0;
  690         }
  691 
  692         if (l != newl) {
  693                 struct lwp *prevlwp;
  694 
  695                 /* Release all locks, but leave the current LWP locked */
  696                 if (l->l_mutex == spc->spc_mutex) {
  697                         /*
  698                          * Drop spc_lwplock, if the current LWP has been moved
  699                          * to the run queue (it is now locked by spc_mutex).
  700                          */
  701                         mutex_spin_exit(spc->spc_lwplock);
  702                 } else {
  703                         /*
  704                          * Otherwise, drop the spc_mutex, we are done with the
  705                          * run queues.
  706                          */
  707                         mutex_spin_exit(spc->spc_mutex);
  708                 }
  709 
  710                 /* We're down to only one lock, so do debug checks. */
  711                 LOCKDEBUG_BARRIER(l->l_mutex, 1);
  712 
  713                 /* Count the context switch. */
  714                 CPU_COUNT(CPU_COUNT_NSWTCH, 1);
  715                 l->l_ncsw++;
  716                 if ((l->l_pflag & LP_PREEMPTING) != 0) {
  717                         l->l_nivcsw++;
  718                         l->l_pflag &= ~LP_PREEMPTING;
  719                 }
  720 
  721                 /*
  722                  * Increase the count of spin-mutexes before the release
  723                  * of the last lock - we must remain at IPL_SCHED after
  724                  * releasing the lock.
  725                  */
  726                 KASSERTMSG(ci->ci_mtx_count == -1,
  727                     "%s: cpu%u: ci_mtx_count (%d) != -1 "
  728                     "(block with spin-mutex held)",
  729                      __func__, cpu_index(ci), ci->ci_mtx_count);
  730                 oldspl = MUTEX_SPIN_OLDSPL(ci);
  731                 ci->ci_mtx_count = -2;
  732 
  733                 /* Update status for lwpctl, if present. */
  734                 if (l->l_lwpctl != NULL) {
  735                         l->l_lwpctl->lc_curcpu = (l->l_stat == LSZOMB ?
  736                             LWPCTL_CPU_EXITED : LWPCTL_CPU_NONE);
  737                 }
  738 
  739                 /*
  740                  * If curlwp is a soft interrupt LWP, there's nobody on the
  741                  * other side to unlock - we're returning into an assembly
  742                  * trampoline.  Unlock now.  This is safe because this is a
  743                  * kernel LWP and is bound to current CPU: the worst anyone
  744                  * else will do to it, is to put it back onto this CPU's run
  745                  * queue (and the CPU is busy here right now!).
  746                  */
  747                 if (returning) {
  748                         /* Keep IPL_SCHED after this; MD code will fix up. */
  749                         l->l_pflag &= ~LP_RUNNING;
  750                         lwp_unlock(l);
  751                 } else {
  752                         /* A normal LWP: save old VM context. */
  753                         pmap_deactivate(l);
  754                 }
  755 
  756                 /*
  757                  * If DTrace has set the active vtime enum to anything
  758                  * other than INACTIVE (0), then it should have set the
  759                  * function to call.
  760                  */
  761                 if (__predict_false(dtrace_vtime_active)) {
  762                         (*dtrace_vtime_switch_func)(newl);
  763                 }
  764 
  765                 /*
  766                  * We must ensure not to come here from inside a read section.
  767                  */
  768                 KASSERT(pserialize_not_in_read_section());
  769 
  770                 /* Switch to the new LWP.. */
  771 #ifdef MULTIPROCESSOR
  772                 KASSERT(curlwp == ci->ci_curlwp);
  773 #endif
  774                 KASSERTMSG(l == curlwp, "l %p curlwp %p", l, curlwp);
  775                 prevlwp = cpu_switchto(l, newl, returning);
  776                 ci = curcpu();
  777 #ifdef MULTIPROCESSOR
  778                 KASSERT(curlwp == ci->ci_curlwp);
  779 #endif
  780                 KASSERTMSG(l == curlwp, "l %p curlwp %p prevlwp %p",
  781                     l, curlwp, prevlwp);
  782                 KASSERT(prevlwp != NULL);
  783                 KASSERT(l->l_cpu == ci);
  784                 KASSERT(ci->ci_mtx_count == -2);
  785 
  786                 /*
  787                  * Immediately mark the previous LWP as no longer running
  788                  * and unlock (to keep lock wait times short as possible).
  789                  * We'll still be at IPL_SCHED afterwards.  If a zombie,
  790                  * don't touch after clearing LP_RUNNING as it could be
  791                  * reaped by another CPU.  Issue a memory barrier to ensure
  792                  * this.
  793                  *
  794                  * atomic_store_release matches atomic_load_acquire in
  795                  * lwp_free.
  796                  */
  797                 KASSERT((prevlwp->l_pflag & LP_RUNNING) != 0);
  798                 lock = prevlwp->l_mutex;
  799                 if (__predict_false(prevlwp->l_stat == LSZOMB)) {
  800                         atomic_store_release(&prevlwp->l_pflag,
  801                             prevlwp->l_pflag & ~LP_RUNNING);
  802                 } else {
  803                         prevlwp->l_pflag &= ~LP_RUNNING;
  804                 }
  805                 mutex_spin_exit(lock);
  806 
  807                 /*
  808                  * Switched away - we have new curlwp.
  809                  * Restore VM context and IPL.
  810                  */
  811                 pmap_activate(l);
  812                 pcu_switchpoint(l);
  813 
  814                 /* Update status for lwpctl, if present. */
  815                 if (l->l_lwpctl != NULL) {
  816                         l->l_lwpctl->lc_curcpu = (int)cpu_index(ci);
  817                         l->l_lwpctl->lc_pctr++;
  818                 }
  819 
  820                 /*
  821                  * Normalize the spin mutex count and restore the previous
  822                  * SPL.  Note that, unless the caller disabled preemption,
  823                  * we can be preempted at any time after this splx().
  824                  */
  825                 KASSERT(l->l_cpu == ci);
  826                 KASSERT(ci->ci_mtx_count == -1);
  827                 ci->ci_mtx_count = 0;
  828                 splx(oldspl);
  829         } else {
  830                 /* Nothing to do - just unlock and return. */
  831                 mutex_spin_exit(spc->spc_mutex);
  832                 l->l_pflag &= ~LP_PREEMPTING;
  833                 lwp_unlock(l);
  834         }
  835 
  836         KASSERT(l == curlwp);
  837         KASSERT(l->l_stat == LSONPROC || (l->l_flag & LW_IDLE) != 0); 
  838 
  839         SYSCALL_TIME_WAKEUP(l);
  840         LOCKDEBUG_BARRIER(NULL, 1);
  841 }
  842 
  843 /*
  844  * setrunnable: change LWP state to be runnable, placing it on the run queue.
  845  *
  846  * Call with the process and LWP locked.  Will return with the LWP unlocked.
  847  */
  848 void
  849 setrunnable(struct lwp *l)
  850 {
  851         struct proc *p = l->l_proc;
  852         struct cpu_info *ci;
  853         kmutex_t *oldlock;
  854 
  855         KASSERT((l->l_flag & LW_IDLE) == 0);
  856         KASSERT((l->l_flag & LW_DBGSUSPEND) == 0);
  857         KASSERT(mutex_owned(p->p_lock));
  858         KASSERT(lwp_locked(l, NULL));
  859         KASSERT(l->l_mutex != l->l_cpu->ci_schedstate.spc_mutex);
  860 
  861         switch (l->l_stat) {
  862         case LSSTOP:
  863                 /*
  864                  * If we're being traced (possibly because someone attached us
  865                  * while we were stopped), check for a signal from the debugger.
  866                  */
  867                 if ((p->p_slflag & PSL_TRACED) != 0 && p->p_xsig != 0)
  868                         signotify(l);
  869                 p->p_nrlwps++;
  870                 break;
  871         case LSSUSPENDED:
  872                 KASSERT(lwp_locked(l, l->l_cpu->ci_schedstate.spc_lwplock));
  873                 l->l_flag &= ~LW_WSUSPEND;
  874                 p->p_nrlwps++;
  875                 cv_broadcast(&p->p_lwpcv);
  876                 break;
  877         case LSSLEEP:
  878                 KASSERT(l->l_wchan != NULL);
  879                 break;
  880         case LSIDL:
  881                 KASSERT(lwp_locked(l, l->l_cpu->ci_schedstate.spc_lwplock));
  882                 break;
  883         default:
  884                 panic("setrunnable: lwp %p state was %d", l, l->l_stat);
  885         }
  886 
  887         /*
  888          * If the LWP was sleeping, start it again.
  889          */
  890         if (l->l_wchan != NULL) {
  891                 l->l_stat = LSSLEEP;
  892                 /* lwp_unsleep() will release the lock. */
  893                 lwp_unsleep(l, true);
  894                 return;
  895         }
  896 
  897         /*
  898          * If the LWP is still on the CPU, mark it as LSONPROC.  It may be
  899          * about to call mi_switch(), in which case it will yield.
  900          */
  901         if ((l->l_pflag & LP_RUNNING) != 0) {
  902                 l->l_stat = LSONPROC;
  903                 l->l_slptime = 0;
  904                 lwp_unlock(l);
  905                 return;
  906         }
  907 
  908         /*
  909          * Look for a CPU to run.
  910          * Set the LWP runnable.
  911          */
  912         ci = sched_takecpu(l);
  913         l->l_cpu = ci;
  914         spc_lock(ci);
  915         oldlock = lwp_setlock(l, l->l_cpu->ci_schedstate.spc_mutex);
  916         sched_setrunnable(l);
  917         l->l_stat = LSRUN;
  918         l->l_slptime = 0;
  919         sched_enqueue(l);
  920         sched_resched_lwp(l, true);
  921         /* SPC & LWP now unlocked. */
  922         mutex_spin_exit(oldlock);
  923 }
  924 
  925 /*
  926  * suspendsched:
  927  *
  928  *      Convert all non-LW_SYSTEM LSSLEEP or LSRUN LWPs to LSSUSPENDED. 
  929  */
  930 void
  931 suspendsched(void)
  932 {
  933         CPU_INFO_ITERATOR cii;
  934         struct cpu_info *ci;
  935         struct lwp *l;
  936         struct proc *p;
  937 
  938         /*
  939          * We do this by process in order not to violate the locking rules.
  940          */
  941         mutex_enter(&proc_lock);
  942         PROCLIST_FOREACH(p, &allproc) {
  943                 mutex_enter(p->p_lock);
  944                 if ((p->p_flag & PK_SYSTEM) != 0) {
  945                         mutex_exit(p->p_lock);
  946                         continue;
  947                 }
  948 
  949                 if (p->p_stat != SSTOP) {
  950                         if (p->p_stat != SZOMB && p->p_stat != SDEAD) {
  951                                 p->p_pptr->p_nstopchild++;
  952                                 p->p_waited = 0;
  953                         }
  954                         p->p_stat = SSTOP;
  955                 }
  956 
  957                 LIST_FOREACH(l, &p->p_lwps, l_sibling) {
  958                         if (l == curlwp)
  959                                 continue;
  960 
  961                         lwp_lock(l);
  962 
  963                         /*
  964                          * Set L_WREBOOT so that the LWP will suspend itself
  965                          * when it tries to return to user mode.  We want to
  966                          * try and get to get as many LWPs as possible to
  967                          * the user / kernel boundary, so that they will
  968                          * release any locks that they hold.
  969                          */
  970                         l->l_flag |= (LW_WREBOOT | LW_WSUSPEND);
  971 
  972                         if (l->l_stat == LSSLEEP &&
  973                             (l->l_flag & LW_SINTR) != 0) {
  974                                 /* setrunnable() will release the lock. */
  975                                 setrunnable(l);
  976                                 continue;
  977                         }
  978 
  979                         lwp_unlock(l);
  980                 }
  981 
  982                 mutex_exit(p->p_lock);
  983         }
  984         mutex_exit(&proc_lock);
  985 
  986         /*
  987          * Kick all CPUs to make them preempt any LWPs running in user mode. 
  988          * They'll trap into the kernel and suspend themselves in userret(). 
  989          *
  990          * Unusually, we don't hold any other scheduler object locked, which
  991          * would keep preemption off for sched_resched_cpu(), so disable it
  992          * explicitly.
  993          */
  994         kpreempt_disable();
  995         for (CPU_INFO_FOREACH(cii, ci)) {
  996                 spc_lock(ci);
  997                 sched_resched_cpu(ci, PRI_KERNEL, true);
  998                 /* spc now unlocked */
  999         }
 1000         kpreempt_enable();
 1001 }
 1002 
 1003 /*
 1004  * sched_unsleep:
 1005  *
 1006  *      The is called when the LWP has not been awoken normally but instead
 1007  *      interrupted: for example, if the sleep timed out.  Because of this,
 1008  *      it's not a valid action for running or idle LWPs.
 1009  */
 1010 static void
 1011 sched_unsleep(struct lwp *l, bool cleanup)
 1012 {
 1013 
 1014         lwp_unlock(l);
 1015         panic("sched_unsleep");
 1016 }
 1017 
 1018 static void
 1019 sched_changepri(struct lwp *l, pri_t pri)
 1020 {
 1021         struct schedstate_percpu *spc;
 1022         struct cpu_info *ci;
 1023 
 1024         KASSERT(lwp_locked(l, NULL));
 1025 
 1026         ci = l->l_cpu;
 1027         spc = &ci->ci_schedstate;
 1028 
 1029         if (l->l_stat == LSRUN) {
 1030                 KASSERT(lwp_locked(l, spc->spc_mutex));
 1031                 sched_dequeue(l);
 1032                 l->l_priority = pri;
 1033                 sched_enqueue(l);
 1034                 sched_resched_lwp(l, false);
 1035         } else if (l->l_stat == LSONPROC && l->l_class != SCHED_OTHER) {
 1036                 /* On priority drop, only evict realtime LWPs. */
 1037                 KASSERT(lwp_locked(l, spc->spc_lwplock));
 1038                 l->l_priority = pri;
 1039                 spc_lock(ci);
 1040                 sched_resched_cpu(ci, spc->spc_maxpriority, true);
 1041                 /* spc now unlocked */
 1042         } else {
 1043                 l->l_priority = pri;
 1044         }
 1045 }
 1046 
 1047 static void
 1048 sched_lendpri(struct lwp *l, pri_t pri)
 1049 {
 1050         struct schedstate_percpu *spc;
 1051         struct cpu_info *ci;
 1052 
 1053         KASSERT(lwp_locked(l, NULL));
 1054 
 1055         ci = l->l_cpu;
 1056         spc = &ci->ci_schedstate;
 1057 
 1058         if (l->l_stat == LSRUN) {
 1059                 KASSERT(lwp_locked(l, spc->spc_mutex));
 1060                 sched_dequeue(l);
 1061                 l->l_inheritedprio = pri;
 1062                 l->l_auxprio = MAX(l->l_inheritedprio, l->l_protectprio);
 1063                 sched_enqueue(l);
 1064                 sched_resched_lwp(l, false);
 1065         } else if (l->l_stat == LSONPROC && l->l_class != SCHED_OTHER) {
 1066                 /* On priority drop, only evict realtime LWPs. */
 1067                 KASSERT(lwp_locked(l, spc->spc_lwplock));
 1068                 l->l_inheritedprio = pri;
 1069                 l->l_auxprio = MAX(l->l_inheritedprio, l->l_protectprio);
 1070                 spc_lock(ci);
 1071                 sched_resched_cpu(ci, spc->spc_maxpriority, true);
 1072                 /* spc now unlocked */
 1073         } else {
 1074                 l->l_inheritedprio = pri;
 1075                 l->l_auxprio = MAX(l->l_inheritedprio, l->l_protectprio);
 1076         }
 1077 }
 1078 
 1079 struct lwp *
 1080 syncobj_noowner(wchan_t wchan)
 1081 {
 1082 
 1083         return NULL;
 1084 }
 1085 
 1086 /* Decay 95% of proc::p_pctcpu in 60 seconds, ccpu = exp(-1/20) */
 1087 const fixpt_t ccpu = 0.95122942450071400909 * FSCALE;
 1088 
 1089 /*
 1090  * Constants for averages over 1, 5 and 15 minutes when sampling at
 1091  * 5 second intervals.
 1092  */
 1093 static const fixpt_t cexp[ ] = {
 1094         0.9200444146293232 * FSCALE,    /* exp(-1/12) */
 1095         0.9834714538216174 * FSCALE,    /* exp(-1/60) */
 1096         0.9944598480048967 * FSCALE,    /* exp(-1/180) */
 1097 };
 1098 
 1099 /*
 1100  * sched_pstats:
 1101  *
 1102  * => Update process statistics and check CPU resource allocation.
 1103  * => Call scheduler-specific hook to eventually adjust LWP priorities.
 1104  * => Compute load average of a quantity on 1, 5 and 15 minute intervals.
 1105  */
 1106 void
 1107 sched_pstats(void)
 1108 {
 1109         struct loadavg *avg = &averunnable;
 1110         const int clkhz = (stathz != 0 ? stathz : hz);
 1111         static bool backwards = false;
 1112         static u_int lavg_count = 0;
 1113         struct proc *p;
 1114         int nrun;
 1115 
 1116         sched_pstats_ticks++;
 1117         if (++lavg_count >= 5) {
 1118                 lavg_count = 0;
 1119                 nrun = 0;
 1120         }
 1121         mutex_enter(&proc_lock);
 1122         PROCLIST_FOREACH(p, &allproc) {
 1123                 struct lwp *l;
 1124                 struct rlimit *rlim;
 1125                 time_t runtm;
 1126                 int sig;
 1127 
 1128                 /* Increment sleep time (if sleeping), ignore overflow. */
 1129                 mutex_enter(p->p_lock);
 1130                 runtm = p->p_rtime.sec;
 1131                 LIST_FOREACH(l, &p->p_lwps, l_sibling) {
 1132                         fixpt_t lpctcpu;
 1133                         u_int lcpticks;
 1134 
 1135                         if (__predict_false((l->l_flag & LW_IDLE) != 0))
 1136                                 continue;
 1137                         lwp_lock(l);
 1138                         runtm += l->l_rtime.sec;
 1139                         l->l_swtime++;
 1140                         sched_lwp_stats(l);
 1141 
 1142                         /* For load average calculation. */
 1143                         if (__predict_false(lavg_count == 0) &&
 1144                             (l->l_flag & (LW_SINTR | LW_SYSTEM)) == 0) {
 1145                                 switch (l->l_stat) {
 1146                                 case LSSLEEP:
 1147                                         if (l->l_slptime > 1) {
 1148                                                 break;
 1149                                         }
 1150                                         /* FALLTHROUGH */
 1151                                 case LSRUN:
 1152                                 case LSONPROC:
 1153                                 case LSIDL:
 1154                                         nrun++;
 1155                                 }
 1156                         }
 1157                         lwp_unlock(l);
 1158 
 1159                         l->l_pctcpu = (l->l_pctcpu * ccpu) >> FSHIFT;
 1160                         if (l->l_slptime != 0)
 1161                                 continue;
 1162 
 1163                         lpctcpu = l->l_pctcpu;
 1164                         lcpticks = atomic_swap_uint(&l->l_cpticks, 0);
 1165                         lpctcpu += ((FSCALE - ccpu) *
 1166                             (lcpticks * FSCALE / clkhz)) >> FSHIFT;
 1167                         l->l_pctcpu = lpctcpu;
 1168                 }
 1169                 /* Calculating p_pctcpu only for ps(1) */
 1170                 p->p_pctcpu = (p->p_pctcpu * ccpu) >> FSHIFT;
 1171 
 1172                 if (__predict_false(runtm < 0)) {
 1173                         if (!backwards) {
 1174                                 backwards = true;
 1175                                 printf("WARNING: negative runtime; "
 1176                                     "monotonic clock has gone backwards\n");
 1177                         }
 1178                         mutex_exit(p->p_lock);
 1179                         continue;
 1180                 }
 1181 
 1182                 /*
 1183                  * Check if the process exceeds its CPU resource allocation.
 1184                  * If over the hard limit, kill it with SIGKILL.
 1185                  * If over the soft limit, send SIGXCPU and raise
 1186                  * the soft limit a little.
 1187                  */
 1188                 rlim = &p->p_rlimit[RLIMIT_CPU];
 1189                 sig = 0;
 1190                 if (__predict_false(runtm >= rlim->rlim_cur)) {
 1191                         if (runtm >= rlim->rlim_max) {
 1192                                 sig = SIGKILL;
 1193                                 log(LOG_NOTICE,
 1194                                     "pid %d, command %s, is killed: %s\n",
 1195                                     p->p_pid, p->p_comm, "exceeded RLIMIT_CPU");
 1196                                 uprintf("pid %d, command %s, is killed: %s\n",
 1197                                     p->p_pid, p->p_comm, "exceeded RLIMIT_CPU");
 1198                         } else {
 1199                                 sig = SIGXCPU;
 1200                                 if (rlim->rlim_cur < rlim->rlim_max)
 1201                                         rlim->rlim_cur += 5;
 1202                         }
 1203                 }
 1204                 mutex_exit(p->p_lock);
 1205                 if (__predict_false(sig)) {
 1206                         KASSERT((p->p_flag & PK_SYSTEM) == 0);
 1207                         psignal(p, sig);
 1208                 }
 1209         }
 1210 
 1211         /* Load average calculation. */
 1212         if (__predict_false(lavg_count == 0)) {
 1213                 int i;
 1214                 CTASSERT(__arraycount(cexp) == __arraycount(avg->ldavg));
 1215                 for (i = 0; i < __arraycount(cexp); i++) {
 1216                         avg->ldavg[i] = (cexp[i] * avg->ldavg[i] +
 1217                             nrun * FSCALE * (FSCALE - cexp[i])) >> FSHIFT;
 1218                 }
 1219         }
 1220 
 1221         /* Lightning bolt. */
 1222         cv_broadcast(&lbolt);
 1223 
 1224         mutex_exit(&proc_lock);
 1225 }
Cache object: b77d4f7cb16313d481c90c5e59a9d286
[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]
This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.
FreeBSD/Linux Kernel Cross Reference sys/kern/kern_synch.c

FreeBSD/Linux Kernel Cross Reference
sys/kern/kern_synch.c