kern_synch.c

Version: - FREEBSD - FREEBSD-13-STABLE - FREEBSD-13-0 - FREEBSD-12-STABLE - FREEBSD-12-0 - FREEBSD-11-STABLE - FREEBSD-11-0 - FREEBSD-10-STABLE - FREEBSD-10-0 - FREEBSD-9-STABLE - FREEBSD-9-0 - FREEBSD-8-STABLE - FREEBSD-8-0 - FREEBSD-7-STABLE - FREEBSD-7-0 - FREEBSD-6-STABLE - FREEBSD-6-0 - FREEBSD-5-STABLE - FREEBSD-5-0 - FREEBSD-4-STABLE - FREEBSD-3-STABLE - FREEBSD22 - l41 - OPENBSD - linux-2.6 - MK84 - PLAN9 - xnu-8792
SearchContext: - none - 3 - 10
    1 /*      $NetBSD: kern_synch.c,v 1.254.2.6 2009/04/23 17:47:13 snj Exp $ */
    2 
    3 /*-
    4  * Copyright (c) 1999, 2000, 2004, 2006, 2007, 2008, 2009
    5  *    The NetBSD Foundation, Inc.
    6  * All rights reserved.
    7  *
    8  * This code is derived from software contributed to The NetBSD Foundation
    9  * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
   10  * NASA Ames Research Center, by Charles M. Hannum, Andrew Doran and
   11  * Daniel Sieger.
   12  *
   13  * Redistribution and use in source and binary forms, with or without
   14  * modification, are permitted provided that the following conditions
   15  * are met:
   16  * 1. Redistributions of source code must retain the above copyright
   17  *    notice, this list of conditions and the following disclaimer.
   18  * 2. Redistributions in binary form must reproduce the above copyright
   19  *    notice, this list of conditions and the following disclaimer in the
   20  *    documentation and/or other materials provided with the distribution.
   21  *
   22  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
   23  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
   24  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
   25  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
   26  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
   27  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
   28  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
   29  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
   30  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
   31  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   32  * POSSIBILITY OF SUCH DAMAGE.
   33  */
   34 
   35 /*-
   36  * Copyright (c) 1982, 1986, 1990, 1991, 1993
   37  *      The Regents of the University of California.  All rights reserved.
   38  * (c) UNIX System Laboratories, Inc.
   39  * All or some portions of this file are derived from material licensed
   40  * to the University of California by American Telephone and Telegraph
   41  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
   42  * the permission of UNIX System Laboratories, Inc.
   43  *
   44  * Redistribution and use in source and binary forms, with or without
   45  * modification, are permitted provided that the following conditions
   46  * are met:
   47  * 1. Redistributions of source code must retain the above copyright
   48  *    notice, this list of conditions and the following disclaimer.
   49  * 2. Redistributions in binary form must reproduce the above copyright
   50  *    notice, this list of conditions and the following disclaimer in the
   51  *    documentation and/or other materials provided with the distribution.
   52  * 3. Neither the name of the University nor the names of its contributors
   53  *    may be used to endorse or promote products derived from this software
   54  *    without specific prior written permission.
   55  *
   56  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   57  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   58  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   59  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   60  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   61  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   62  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   63  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   64  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   65  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   66  * SUCH DAMAGE.
   67  *
   68  *      @(#)kern_synch.c        8.9 (Berkeley) 5/19/95
   69  */
   70 
   71 #include <sys/cdefs.h>
   72 __KERNEL_RCSID(0, "$NetBSD: kern_synch.c,v 1.254.2.6 2009/04/23 17:47:13 snj Exp $");
   73 
   74 #include "opt_kstack.h"
   75 #include "opt_perfctrs.h"
   76 #include "opt_sa.h"
   77 
   78 #define __MUTEX_PRIVATE
   79 
   80 #include <sys/param.h>
   81 #include <sys/systm.h>
   82 #include <sys/proc.h>
   83 #include <sys/kernel.h>
   84 #if defined(PERFCTRS)
   85 #include <sys/pmc.h>
   86 #endif
   87 #include <sys/cpu.h>
   88 #include <sys/resourcevar.h>
   89 #include <sys/sched.h>
   90 #include <sys/sa.h>
   91 #include <sys/savar.h>
   92 #include <sys/syscall_stats.h>
   93 #include <sys/sleepq.h>
   94 #include <sys/lockdebug.h>
   95 #include <sys/evcnt.h>
   96 #include <sys/intr.h>
   97 #include <sys/lwpctl.h>
   98 #include <sys/atomic.h>
   99 #include <sys/simplelock.h>
  100 
  101 #include <uvm/uvm_extern.h>
  102 
  103 #include <dev/lockstat.h>
  104 
  105 static u_int    sched_unsleep(struct lwp *, bool);
  106 static void     sched_changepri(struct lwp *, pri_t);
  107 static void     sched_lendpri(struct lwp *, pri_t);
  108 static void     resched_cpu(struct lwp *);
  109 
  110 syncobj_t sleep_syncobj = {
  111         SOBJ_SLEEPQ_SORTED,
  112         sleepq_unsleep,
  113         sleepq_changepri,
  114         sleepq_lendpri,
  115         syncobj_noowner,
  116 };
  117 
  118 syncobj_t sched_syncobj = {
  119         SOBJ_SLEEPQ_SORTED,
  120         sched_unsleep,
  121         sched_changepri,
  122         sched_lendpri,
  123         syncobj_noowner,
  124 };
  125 
  126 callout_t       sched_pstats_ch;
  127 unsigned        sched_pstats_ticks;
  128 kcondvar_t      lbolt;                  /* once a second sleep address */
  129 
  130 /* Preemption event counters */
  131 static struct evcnt kpreempt_ev_crit;
  132 static struct evcnt kpreempt_ev_klock;
  133 static struct evcnt kpreempt_ev_immed;
  134 
  135 /*
  136  * During autoconfiguration or after a panic, a sleep will simply lower the
  137  * priority briefly to allow interrupts, then return.  The priority to be
  138  * used (safepri) is machine-dependent, thus this value is initialized and
  139  * maintained in the machine-dependent layers.  This priority will typically
  140  * be 0, or the lowest priority that is safe for use on the interrupt stack;
  141  * it can be made higher to block network software interrupts after panics.
  142  */
  143 int     safepri;
  144 
  145 void
  146 sched_init(void)
  147 {
  148 
  149         cv_init(&lbolt, "lbolt");
  150         callout_init(&sched_pstats_ch, CALLOUT_MPSAFE);
  151         callout_setfunc(&sched_pstats_ch, sched_pstats, NULL);
  152 
  153         evcnt_attach_dynamic(&kpreempt_ev_crit, EVCNT_TYPE_MISC, NULL,
  154            "kpreempt", "defer: critical section");
  155         evcnt_attach_dynamic(&kpreempt_ev_klock, EVCNT_TYPE_MISC, NULL,
  156            "kpreempt", "defer: kernel_lock");
  157         evcnt_attach_dynamic(&kpreempt_ev_immed, EVCNT_TYPE_MISC, NULL,
  158            "kpreempt", "immediate");
  159 
  160         sched_pstats(NULL);
  161 }
  162 
  163 /*
  164  * OBSOLETE INTERFACE
  165  *
  166  * General sleep call.  Suspends the current process until a wakeup is
  167  * performed on the specified identifier.  The process will then be made
  168  * runnable with the specified priority.  Sleeps at most timo/hz seconds (0
  169  * means no timeout).  If pri includes PCATCH flag, signals are checked
  170  * before and after sleeping, else signals are not checked.  Returns 0 if
  171  * awakened, EWOULDBLOCK if the timeout expires.  If PCATCH is set and a
  172  * signal needs to be delivered, ERESTART is returned if the current system
  173  * call should be restarted if possible, and EINTR is returned if the system
  174  * call should be interrupted by the signal (return EINTR).
  175  *
  176  * The interlock is held until we are on a sleep queue. The interlock will
  177  * be locked before returning back to the caller unless the PNORELOCK flag
  178  * is specified, in which case the interlock will always be unlocked upon
  179  * return.
  180  */
  181 int
  182 ltsleep(wchan_t ident, pri_t priority, const char *wmesg, int timo,
  183         volatile struct simplelock *interlock)
  184 {
  185         struct lwp *l = curlwp;
  186         sleepq_t *sq;
  187         kmutex_t *mp;
  188         int error;
  189 
  190         KASSERT((l->l_pflag & LP_INTR) == 0);
  191 
  192         if (sleepq_dontsleep(l)) {
  193                 (void)sleepq_abort(NULL, 0);
  194                 if ((priority & PNORELOCK) != 0)
  195                         simple_unlock(interlock);
  196                 return 0;
  197         }
  198 
  199         l->l_kpriority = true;
  200         sq = sleeptab_lookup(&sleeptab, ident, &mp);
  201         sleepq_enter(sq, l, mp);
  202         sleepq_enqueue(sq, ident, wmesg, &sleep_syncobj);
  203 
  204         if (interlock != NULL) {
  205                 KASSERT(simple_lock_held(interlock));
  206                 simple_unlock(interlock);
  207         }
  208 
  209         error = sleepq_block(timo, priority & PCATCH);
  210 
  211         if (interlock != NULL && (priority & PNORELOCK) == 0)
  212                 simple_lock(interlock);
  213  
  214         return error;
  215 }
  216 
  217 int
  218 mtsleep(wchan_t ident, pri_t priority, const char *wmesg, int timo,
  219         kmutex_t *mtx)
  220 {
  221         struct lwp *l = curlwp;
  222         sleepq_t *sq;
  223         kmutex_t *mp;
  224         int error;
  225 
  226         KASSERT((l->l_pflag & LP_INTR) == 0);
  227 
  228         if (sleepq_dontsleep(l)) {
  229                 (void)sleepq_abort(mtx, (priority & PNORELOCK) != 0);
  230                 return 0;
  231         }
  232 
  233         l->l_kpriority = true;
  234         sq = sleeptab_lookup(&sleeptab, ident, &mp);
  235         sleepq_enter(sq, l, mp);
  236         sleepq_enqueue(sq, ident, wmesg, &sleep_syncobj);
  237         mutex_exit(mtx);
  238         error = sleepq_block(timo, priority & PCATCH);
  239 
  240         if ((priority & PNORELOCK) == 0)
  241                 mutex_enter(mtx);
  242  
  243         return error;
  244 }
  245 
  246 /*
  247  * General sleep call for situations where a wake-up is not expected.
  248  */
  249 int
  250 kpause(const char *wmesg, bool intr, int timo, kmutex_t *mtx)
  251 {
  252         struct lwp *l = curlwp;
  253         kmutex_t *mp;
  254         sleepq_t *sq;
  255         int error;
  256 
  257         if (sleepq_dontsleep(l))
  258                 return sleepq_abort(NULL, 0);
  259 
  260         if (mtx != NULL)
  261                 mutex_exit(mtx);
  262         l->l_kpriority = true;
  263         sq = sleeptab_lookup(&sleeptab, l, &mp);
  264         sleepq_enter(sq, l, mp);
  265         sleepq_enqueue(sq, l, wmesg, &sleep_syncobj);
  266         error = sleepq_block(timo, intr);
  267         if (mtx != NULL)
  268                 mutex_enter(mtx);
  269 
  270         return error;
  271 }
  272 
  273 #ifdef KERN_SA
  274 /*
  275  * sa_awaken:
  276  *
  277  *      We believe this lwp is an SA lwp. If it's yielding,
  278  * let it know it needs to wake up.
  279  *
  280  *      We are called and exit with the lwp locked. We are
  281  * called in the middle of wakeup operations, so we need
  282  * to not touch the locks at all.
  283  */
  284 void
  285 sa_awaken(struct lwp *l)
  286 {
  287         /* LOCK_ASSERT(lwp_locked(l, NULL)); */
  288 
  289         if (l == l->l_savp->savp_lwp && l->l_flag & LW_SA_YIELD)
  290                 l->l_flag &= ~LW_SA_IDLE;
  291 }
  292 #endif /* KERN_SA */
  293 
  294 /*
  295  * OBSOLETE INTERFACE
  296  *
  297  * Make all processes sleeping on the specified identifier runnable.
  298  */
  299 void
  300 wakeup(wchan_t ident)
  301 {
  302         sleepq_t *sq;
  303         kmutex_t *mp;
  304 
  305         if (cold)
  306                 return;
  307 
  308         sq = sleeptab_lookup(&sleeptab, ident, &mp);
  309         sleepq_wake(sq, ident, (u_int)-1, mp);
  310 }
  311 
  312 /*
  313  * OBSOLETE INTERFACE
  314  *
  315  * Make the highest priority process first in line on the specified
  316  * identifier runnable.
  317  */
  318 void 
  319 wakeup_one(wchan_t ident)
  320 {
  321         sleepq_t *sq;
  322         kmutex_t *mp;
  323 
  324         if (cold)
  325                 return;
  326 
  327         sq = sleeptab_lookup(&sleeptab, ident, &mp);
  328         sleepq_wake(sq, ident, 1, mp);
  329 }
  330 
  331 
  332 /*
  333  * General yield call.  Puts the current process back on its run queue and
  334  * performs a voluntary context switch.  Should only be called when the
  335  * current process explicitly requests it (eg sched_yield(2)).
  336  */
  337 void
  338 yield(void)
  339 {
  340         struct lwp *l = curlwp;
  341 
  342         KERNEL_UNLOCK_ALL(l, &l->l_biglocks);
  343         lwp_lock(l);
  344         KASSERT(lwp_locked(l, l->l_cpu->ci_schedstate.spc_lwplock));
  345         KASSERT(l->l_stat == LSONPROC);
  346         l->l_kpriority = false;
  347         (void)mi_switch(l);
  348         KERNEL_LOCK(l->l_biglocks, l);
  349 }
  350 
  351 /*
  352  * General preemption call.  Puts the current process back on its run queue
  353  * and performs an involuntary context switch.
  354  */
  355 void
  356 preempt(void)
  357 {
  358         struct lwp *l = curlwp;
  359 
  360         KERNEL_UNLOCK_ALL(l, &l->l_biglocks);
  361         lwp_lock(l);
  362         KASSERT(lwp_locked(l, l->l_cpu->ci_schedstate.spc_lwplock));
  363         KASSERT(l->l_stat == LSONPROC);
  364         l->l_kpriority = false;
  365         l->l_nivcsw++;
  366         (void)mi_switch(l);
  367         KERNEL_LOCK(l->l_biglocks, l);
  368 }
  369 
  370 /*
  371  * Handle a request made by another agent to preempt the current LWP
  372  * in-kernel.  Usually called when l_dopreempt may be non-zero.
  373  *
  374  * Character addresses for lockstat only.
  375  */
  376 static char     in_critical_section;
  377 static char     kernel_lock_held;
  378 static char     is_softint;
  379 static char     cpu_kpreempt_enter_fail;
  380 
  381 bool
  382 kpreempt(uintptr_t where)
  383 {
  384         uintptr_t failed;
  385         lwp_t *l;
  386         int s, dop;
  387 
  388         l = curlwp;
  389         failed = 0;
  390         while ((dop = l->l_dopreempt) != 0) {
  391                 if (l->l_stat != LSONPROC) {
  392                         /*
  393                          * About to block (or die), let it happen.
  394                          * Doesn't really count as "preemption has
  395                          * been blocked", since we're going to
  396                          * context switch.
  397                          */
  398                         l->l_dopreempt = 0;
  399                         return true;
  400                 }
  401                 if (__predict_false((l->l_flag & LW_IDLE) != 0)) {
  402                         /* Can't preempt idle loop, don't count as failure. */
  403                         l->l_dopreempt = 0;
  404                         return true;
  405                 }
  406                 if (__predict_false(l->l_nopreempt != 0)) {
  407                         /* LWP holds preemption disabled, explicitly. */
  408                         if ((dop & DOPREEMPT_COUNTED) == 0) {
  409                                 kpreempt_ev_crit.ev_count++;
  410                         }
  411                         failed = (uintptr_t)&in_critical_section;
  412                         break;
  413                 }
  414                 if (__predict_false((l->l_pflag & LP_INTR) != 0)) {
  415                         /* Can't preempt soft interrupts yet. */
  416                         l->l_dopreempt = 0;
  417                         failed = (uintptr_t)&is_softint;
  418                         break;
  419                 }
  420                 s = splsched();
  421                 if (__predict_false(l->l_blcnt != 0 ||
  422                     curcpu()->ci_biglock_wanted != NULL)) {
  423                         /* Hold or want kernel_lock, code is not MT safe. */
  424                         splx(s);
  425                         if ((dop & DOPREEMPT_COUNTED) == 0) {
  426                                 kpreempt_ev_klock.ev_count++;
  427                         }
  428                         failed = (uintptr_t)&kernel_lock_held;
  429                         break;
  430                 }
  431                 if (__predict_false(!cpu_kpreempt_enter(where, s))) {
  432                         /*
  433                          * It may be that the IPL is too high.
  434                          * kpreempt_enter() can schedule an
  435                          * interrupt to retry later.
  436                          */
  437                         splx(s);
  438                         failed = (uintptr_t)&cpu_kpreempt_enter_fail;
  439                         break;
  440                 }
  441                 /* Do it! */
  442                 if (__predict_true((dop & DOPREEMPT_COUNTED) == 0)) {
  443                         kpreempt_ev_immed.ev_count++;
  444                 }
  445                 lwp_lock(l);
  446                 mi_switch(l);
  447                 l->l_nopreempt++;
  448                 splx(s);
  449 
  450                 /* Take care of any MD cleanup. */
  451                 cpu_kpreempt_exit(where);
  452                 l->l_nopreempt--;
  453         }
  454 
  455         /* Record preemption failure for reporting via lockstat. */
  456         if (__predict_false(failed)) {
  457                 int lsflag = 0;
  458                 atomic_or_uint(&l->l_dopreempt, DOPREEMPT_COUNTED);
  459                 LOCKSTAT_ENTER(lsflag);
  460                 /* Might recurse, make it atomic. */
  461                 if (__predict_false(lsflag)) {
  462                         if (where == 0) {
  463                                 where = (uintptr_t)__builtin_return_address(0);
  464                         }
  465                         if (atomic_cas_ptr_ni((void *)&l->l_pfailaddr,
  466                             NULL, (void *)where) == NULL) {
  467                                 LOCKSTAT_START_TIMER(lsflag, l->l_pfailtime);
  468                                 l->l_pfaillock = failed;
  469                         }
  470                 }
  471                 LOCKSTAT_EXIT(lsflag);
  472         }
  473 
  474         return failed;
  475 }
  476 
  477 /*
  478  * Return true if preemption is explicitly disabled.
  479  */
  480 bool
  481 kpreempt_disabled(void)
  482 {
  483         lwp_t *l;
  484 
  485         l = curlwp;
  486 
  487         return l->l_nopreempt != 0 || l->l_stat == LSZOMB ||
  488             (l->l_flag & LW_IDLE) != 0 || cpu_kpreempt_disabled();
  489 }
  490 
  491 /*
  492  * Disable kernel preemption.
  493  */
  494 void
  495 kpreempt_disable(void)
  496 {
  497 
  498         KPREEMPT_DISABLE(curlwp);
  499 }
  500 
  501 /*
  502  * Reenable kernel preemption.
  503  */
  504 void
  505 kpreempt_enable(void)
  506 {
  507 
  508         KPREEMPT_ENABLE(curlwp);
  509 }
  510 
  511 /*
  512  * Compute the amount of time during which the current lwp was running.
  513  *
  514  * - update l_rtime unless it's an idle lwp.
  515  */
  516 
  517 void
  518 updatertime(lwp_t *l, const struct bintime *now)
  519 {
  520 
  521         if ((l->l_flag & LW_IDLE) != 0)
  522                 return;
  523 
  524         /* rtime += now - stime */
  525         bintime_add(&l->l_rtime, now);
  526         bintime_sub(&l->l_rtime, &l->l_stime);
  527 }
  528 
  529 /*
  530  * Select next LWP from the current CPU to run..
  531  */
  532 static inline lwp_t *
  533 nextlwp(struct cpu_info *ci, struct schedstate_percpu *spc)
  534 {
  535         lwp_t *newl;
  536 
  537         /*
  538          * Let sched_nextlwp() select the LWP to run the CPU next.
  539          * If no LWP is runnable, select the idle LWP.
  540          * 
  541          * Note that spc_lwplock might not necessary be held, and
  542          * new thread would be unlocked after setting the LWP-lock.
  543          */
  544         newl = sched_nextlwp();
  545         if (newl != NULL) {
  546                 sched_dequeue(newl);
  547                 KASSERT(lwp_locked(newl, spc->spc_mutex));
  548                 newl->l_stat = LSONPROC;
  549                 newl->l_cpu = ci;
  550                 newl->l_pflag |= LP_RUNNING;
  551                 lwp_setlock(newl, spc->spc_lwplock);
  552         } else {
  553                 newl = ci->ci_data.cpu_idlelwp;
  554                 newl->l_stat = LSONPROC;
  555                 newl->l_pflag |= LP_RUNNING;
  556         }
  557         
  558         /*
  559          * Only clear want_resched if there are no pending (slow)
  560          * software interrupts.
  561          */
  562         ci->ci_want_resched = ci->ci_data.cpu_softints;
  563         spc->spc_flags &= ~SPCF_SWITCHCLEAR;
  564         spc->spc_curpriority = lwp_eprio(newl);
  565 
  566         return newl;
  567 }
  568 
  569 /*
  570  * The machine independent parts of context switch.
  571  *
  572  * Returns 1 if another LWP was actually run.
  573  */
  574 int
  575 mi_switch(lwp_t *l)
  576 {
  577         struct cpu_info *ci;
  578         struct schedstate_percpu *spc;
  579         struct lwp *newl;
  580         int retval, oldspl;
  581         struct bintime bt;
  582         bool returning;
  583 
  584         KASSERT(lwp_locked(l, NULL));
  585         KASSERT(kpreempt_disabled());
  586         LOCKDEBUG_BARRIER(l->l_mutex, 1);
  587 
  588 #ifdef KSTACK_CHECK_MAGIC
  589         kstack_check_magic(l);
  590 #endif
  591 
  592         binuptime(&bt);
  593 
  594         KASSERT(l->l_cpu == curcpu());
  595         ci = l->l_cpu;
  596         spc = &ci->ci_schedstate;
  597         returning = false;
  598         newl = NULL;
  599 
  600         /*
  601          * If we have been asked to switch to a specific LWP, then there
  602          * is no need to inspect the run queues.  If a soft interrupt is
  603          * blocking, then return to the interrupted thread without adjusting
  604          * VM context or its start time: neither have been changed in order
  605          * to take the interrupt.
  606          */
  607         if (l->l_switchto != NULL) {
  608                 if ((l->l_pflag & LP_INTR) != 0) {
  609                         returning = true;
  610                         softint_block(l);
  611                         if ((l->l_pflag & LP_TIMEINTR) != 0)
  612                                 updatertime(l, &bt);
  613                 }
  614                 newl = l->l_switchto;
  615                 l->l_switchto = NULL;
  616         }
  617 #ifndef __HAVE_FAST_SOFTINTS
  618         else if (ci->ci_data.cpu_softints != 0) {
  619                 /* There are pending soft interrupts, so pick one. */
  620                 newl = softint_picklwp();
  621                 newl->l_stat = LSONPROC;
  622                 newl->l_pflag |= LP_RUNNING;
  623         }
  624 #endif  /* !__HAVE_FAST_SOFTINTS */
  625 
  626         /* Count time spent in current system call */
  627         if (!returning) {
  628                 SYSCALL_TIME_SLEEP(l);
  629 
  630                 /*
  631                  * XXXSMP If we are using h/w performance counters,
  632                  * save context.
  633                  */
  634 #if PERFCTRS
  635                 if (PMC_ENABLED(l->l_proc)) {
  636                         pmc_save_context(l->l_proc);
  637                 }
  638 #endif
  639                 updatertime(l, &bt);
  640         }
  641 
  642         /* Lock the runqueue */
  643         KASSERT(l->l_stat != LSRUN);
  644         mutex_spin_enter(spc->spc_mutex);
  645 
  646         /*
  647          * If on the CPU and we have gotten this far, then we must yield.
  648          */
  649         if (l->l_stat == LSONPROC && l != newl) {
  650                 KASSERT(lwp_locked(l, spc->spc_lwplock));
  651                 if ((l->l_flag & LW_IDLE) == 0) {
  652                         l->l_stat = LSRUN;
  653                         lwp_setlock(l, spc->spc_mutex);
  654                         sched_enqueue(l, true);
  655                         /* Handle migration case */
  656                         KASSERT(spc->spc_migrating == NULL);
  657                         if (l->l_target_cpu !=  NULL) {
  658                                 spc->spc_migrating = l;
  659                         }
  660                 } else
  661                         l->l_stat = LSIDL;
  662         }
  663 
  664         /* Pick new LWP to run. */
  665         if (newl == NULL) {
  666                 newl = nextlwp(ci, spc);
  667         }
  668 
  669         /* Items that must be updated with the CPU locked. */
  670         if (!returning) {
  671                 /* Update the new LWP's start time. */
  672                 newl->l_stime = bt;
  673 
  674                 /*
  675                  * ci_curlwp changes when a fast soft interrupt occurs.
  676                  * We use cpu_onproc to keep track of which kernel or
  677                  * user thread is running 'underneath' the software
  678                  * interrupt.  This is important for time accounting,
  679                  * itimers and forcing user threads to preempt (aston).
  680                  */
  681                 ci->ci_data.cpu_onproc = newl;
  682         }
  683 
  684         /*
  685          * Preemption related tasks.  Must be done with the current
  686          * CPU locked.
  687          */
  688         cpu_did_resched(l);
  689         l->l_dopreempt = 0;
  690         if (__predict_false(l->l_pfailaddr != 0)) {
  691                 LOCKSTAT_FLAG(lsflag);
  692                 LOCKSTAT_ENTER(lsflag);
  693                 LOCKSTAT_STOP_TIMER(lsflag, l->l_pfailtime);
  694                 LOCKSTAT_EVENT_RA(lsflag, l->l_pfaillock, LB_NOPREEMPT|LB_SPIN,
  695                     1, l->l_pfailtime, l->l_pfailaddr);
  696                 LOCKSTAT_EXIT(lsflag);
  697                 l->l_pfailtime = 0;
  698                 l->l_pfaillock = 0;
  699                 l->l_pfailaddr = 0;
  700         }
  701 
  702         if (l != newl) {
  703                 struct lwp *prevlwp;
  704 
  705                 /* Release all locks, but leave the current LWP locked */
  706                 if (l->l_mutex == spc->spc_mutex) {
  707                         /*
  708                          * Drop spc_lwplock, if the current LWP has been moved
  709                          * to the run queue (it is now locked by spc_mutex).
  710                          */
  711                         mutex_spin_exit(spc->spc_lwplock);
  712                 } else {
  713                         /*
  714                          * Otherwise, drop the spc_mutex, we are done with the
  715                          * run queues.
  716                          */
  717                         mutex_spin_exit(spc->spc_mutex);
  718                 }
  719 
  720                 /*
  721                  * Mark that context switch is going to be performed
  722                  * for this LWP, to protect it from being switched
  723                  * to on another CPU.
  724                  */
  725                 KASSERT(l->l_ctxswtch == 0);
  726                 l->l_ctxswtch = 1;
  727                 l->l_ncsw++;
  728                 l->l_pflag &= ~LP_RUNNING;
  729 
  730                 /*
  731                  * Increase the count of spin-mutexes before the release
  732                  * of the last lock - we must remain at IPL_SCHED during
  733                  * the context switch.
  734                  */
  735                 oldspl = MUTEX_SPIN_OLDSPL(ci);
  736                 ci->ci_mtx_count--;
  737                 lwp_unlock(l);
  738 
  739                 /* Count the context switch on this CPU. */
  740                 ci->ci_data.cpu_nswtch++;
  741 
  742                 /* Update status for lwpctl, if present. */
  743                 if (l->l_lwpctl != NULL)
  744                         l->l_lwpctl->lc_curcpu = LWPCTL_CPU_NONE;
  745 
  746                 /*
  747                  * Save old VM context, unless a soft interrupt
  748                  * handler is blocking.
  749                  */
  750                 if (!returning)
  751                         pmap_deactivate(l);
  752 
  753                 /*
  754                  * We may need to spin-wait for if 'newl' is still
  755                  * context switching on another CPU.
  756                  */
  757                 if (newl->l_ctxswtch != 0) {
  758                         u_int count;
  759                         count = SPINLOCK_BACKOFF_MIN;
  760                         while (newl->l_ctxswtch)
  761                                 SPINLOCK_BACKOFF(count);
  762                 }
  763 
  764                 /* Switch to the new LWP.. */
  765                 prevlwp = cpu_switchto(l, newl, returning);
  766                 ci = curcpu();
  767 
  768                 /*
  769                  * Switched away - we have new curlwp.
  770                  * Restore VM context and IPL.
  771                  */
  772                 pmap_activate(l);
  773                 if (prevlwp != NULL) {
  774                         /* Normalize the count of the spin-mutexes */
  775                         ci->ci_mtx_count++;
  776                         /* Unmark the state of context switch */
  777                         membar_exit();
  778                         prevlwp->l_ctxswtch = 0;
  779                 }
  780 
  781                 /* Update status for lwpctl, if present. */
  782                 if (l->l_lwpctl != NULL) {
  783                         l->l_lwpctl->lc_curcpu = (int)cpu_index(ci);
  784                         l->l_lwpctl->lc_pctr++;
  785                 }
  786 
  787                 KASSERT(l->l_cpu == ci);
  788                 splx(oldspl);
  789                 retval = 1;
  790         } else {
  791                 /* Nothing to do - just unlock and return. */
  792                 mutex_spin_exit(spc->spc_mutex);
  793                 lwp_unlock(l);
  794                 retval = 0;
  795         }
  796 
  797         KASSERT(l == curlwp);
  798         KASSERT(l->l_stat == LSONPROC);
  799 
  800         /*
  801          * XXXSMP If we are using h/w performance counters, restore context.
  802          * XXXSMP preemption problem.
  803          */
  804 #if PERFCTRS
  805         if (PMC_ENABLED(l->l_proc)) {
  806                 pmc_restore_context(l->l_proc);
  807         }
  808 #endif
  809         SYSCALL_TIME_WAKEUP(l);
  810         LOCKDEBUG_BARRIER(NULL, 1);
  811 
  812         return retval;
  813 }
  814 
  815 /*
  816  * The machine independent parts of context switch to oblivion.
  817  * Does not return.  Call with the LWP unlocked.
  818  */
  819 void
  820 lwp_exit_switchaway(lwp_t *l)
  821 {
  822         struct cpu_info *ci;
  823         struct lwp *newl;
  824         struct bintime bt;
  825 
  826         ci = l->l_cpu;
  827 
  828         KASSERT(kpreempt_disabled());
  829         KASSERT(l->l_stat == LSZOMB || l->l_stat == LSIDL);
  830         KASSERT(ci == curcpu());
  831         LOCKDEBUG_BARRIER(NULL, 0);
  832 
  833 #ifdef KSTACK_CHECK_MAGIC
  834         kstack_check_magic(l);
  835 #endif
  836 
  837         /* Count time spent in current system call */
  838         SYSCALL_TIME_SLEEP(l);
  839         binuptime(&bt);
  840         updatertime(l, &bt);
  841 
  842         /* Must stay at IPL_SCHED even after releasing run queue lock. */
  843         (void)splsched();
  844 
  845         /*
  846          * Let sched_nextlwp() select the LWP to run the CPU next.
  847          * If no LWP is runnable, select the idle LWP.
  848          * 
  849          * Note that spc_lwplock might not necessary be held, and
  850          * new thread would be unlocked after setting the LWP-lock.
  851          */
  852         spc_lock(ci);
  853 #ifndef __HAVE_FAST_SOFTINTS
  854         if (ci->ci_data.cpu_softints != 0) {
  855                 /* There are pending soft interrupts, so pick one. */
  856                 newl = softint_picklwp();
  857                 newl->l_stat = LSONPROC;
  858                 newl->l_pflag |= LP_RUNNING;
  859         } else 
  860 #endif  /* !__HAVE_FAST_SOFTINTS */
  861         {
  862                 newl = nextlwp(ci, &ci->ci_schedstate);
  863         }
  864 
  865         /* Update the new LWP's start time. */
  866         newl->l_stime = bt;
  867         l->l_pflag &= ~LP_RUNNING;
  868 
  869         /*
  870          * ci_curlwp changes when a fast soft interrupt occurs.
  871          * We use cpu_onproc to keep track of which kernel or
  872          * user thread is running 'underneath' the software
  873          * interrupt.  This is important for time accounting,
  874          * itimers and forcing user threads to preempt (aston).
  875          */
  876         ci->ci_data.cpu_onproc = newl;
  877 
  878         /*
  879          * Preemption related tasks.  Must be done with the current
  880          * CPU locked.
  881          */
  882         cpu_did_resched(l);
  883 
  884         /* Unlock the run queue. */
  885         spc_unlock(ci);
  886 
  887         /* Count the context switch on this CPU. */
  888         ci->ci_data.cpu_nswtch++;
  889 
  890         /* Update status for lwpctl, if present. */
  891         if (l->l_lwpctl != NULL)
  892                 l->l_lwpctl->lc_curcpu = LWPCTL_CPU_EXITED;
  893 
  894         /*
  895          * We may need to spin-wait for if 'newl' is still
  896          * context switching on another CPU.
  897          */
  898         if (newl->l_ctxswtch != 0) {
  899                 u_int count;
  900                 count = SPINLOCK_BACKOFF_MIN;
  901                 while (newl->l_ctxswtch)
  902                         SPINLOCK_BACKOFF(count);
  903         }
  904 
  905         /* Switch to the new LWP.. */
  906         (void)cpu_switchto(NULL, newl, false);
  907 
  908         for (;;) continue;      /* XXX: convince gcc about "noreturn" */
  909         /* NOTREACHED */
  910 }
  911 
  912 /*
  913  * Change process state to be runnable, placing it on the run queue if it is
  914  * in memory, and awakening the swapper if it isn't in memory.
  915  *
  916  * Call with the process and LWP locked.  Will return with the LWP unlocked.
  917  */
  918 void
  919 setrunnable(struct lwp *l)
  920 {
  921         struct proc *p = l->l_proc;
  922         struct cpu_info *ci;
  923 
  924         KASSERT((l->l_flag & LW_IDLE) == 0);
  925         KASSERT(mutex_owned(p->p_lock));
  926         KASSERT(lwp_locked(l, NULL));
  927         KASSERT(l->l_mutex != l->l_cpu->ci_schedstate.spc_mutex);
  928 
  929         switch (l->l_stat) {
  930         case LSSTOP:
  931                 /*
  932                  * If we're being traced (possibly because someone attached us
  933                  * while we were stopped), check for a signal from the debugger.
  934                  */
  935                 if ((p->p_slflag & PSL_TRACED) != 0 && p->p_xstat != 0)
  936                         signotify(l);
  937                 p->p_nrlwps++;
  938                 break;
  939         case LSSUSPENDED:
  940                 l->l_flag &= ~LW_WSUSPEND;
  941                 p->p_nrlwps++;
  942                 cv_broadcast(&p->p_lwpcv);
  943                 break;
  944         case LSSLEEP:
  945                 KASSERT(l->l_wchan != NULL);
  946                 break;
  947         default:
  948                 panic("setrunnable: lwp %p state was %d", l, l->l_stat);
  949         }
  950 
  951 #ifdef KERN_SA
  952         if (l->l_proc->p_sa)
  953                 sa_awaken(l);
  954 #endif /* KERN_SA */
  955 
  956         /*
  957          * If the LWP was sleeping interruptably, then it's OK to start it
  958          * again.  If not, mark it as still sleeping.
  959          */
  960         if (l->l_wchan != NULL) {
  961                 l->l_stat = LSSLEEP;
  962                 /* lwp_unsleep() will release the lock. */
  963                 lwp_unsleep(l, true);
  964                 return;
  965         }
  966 
  967         /*
  968          * If the LWP is still on the CPU, mark it as LSONPROC.  It may be
  969          * about to call mi_switch(), in which case it will yield.
  970          */
  971         if ((l->l_pflag & LP_RUNNING) != 0) {
  972                 l->l_stat = LSONPROC;
  973                 l->l_slptime = 0;
  974                 lwp_unlock(l);
  975                 return;
  976         }
  977 
  978         /*
  979          * Look for a CPU to run.
  980          * Set the LWP runnable.
  981          */
  982         ci = sched_takecpu(l);
  983         l->l_cpu = ci;
  984         spc_lock(ci);
  985         lwp_unlock_to(l, ci->ci_schedstate.spc_mutex);
  986         sched_setrunnable(l);
  987         l->l_stat = LSRUN;
  988         l->l_slptime = 0;
  989 
  990         /*
  991          * If thread is swapped out - wake the swapper to bring it back in.
  992          * Otherwise, enter it into a run queue.
  993          */
  994         if (l->l_flag & LW_INMEM) {
  995                 sched_enqueue(l, false);
  996                 resched_cpu(l);
  997                 lwp_unlock(l);
  998         } else {
  999                 lwp_unlock(l);
 1000                 uvm_kick_scheduler();
 1001         }
 1002 }
 1003 
 1004 /*
 1005  * suspendsched:
 1006  *
 1007  *      Convert all non-L_SYSTEM LSSLEEP or LSRUN LWPs to LSSUSPENDED. 
 1008  */
 1009 void
 1010 suspendsched(void)
 1011 {
 1012         CPU_INFO_ITERATOR cii;
 1013         struct cpu_info *ci;
 1014         struct lwp *l;
 1015         struct proc *p;
 1016 
 1017         /*
 1018          * We do this by process in order not to violate the locking rules.
 1019          */
 1020         mutex_enter(proc_lock);
 1021         PROCLIST_FOREACH(p, &allproc) {
 1022                 if ((p->p_flag & PK_MARKER) != 0)
 1023                         continue;
 1024 
 1025                 mutex_enter(p->p_lock);
 1026                 if ((p->p_flag & PK_SYSTEM) != 0) {
 1027                         mutex_exit(p->p_lock);
 1028                         continue;
 1029                 }
 1030 
 1031                 p->p_stat = SSTOP;
 1032 
 1033                 LIST_FOREACH(l, &p->p_lwps, l_sibling) {
 1034                         if (l == curlwp)
 1035                                 continue;
 1036 
 1037                         lwp_lock(l);
 1038 
 1039                         /*
 1040                          * Set L_WREBOOT so that the LWP will suspend itself
 1041                          * when it tries to return to user mode.  We want to
 1042                          * try and get to get as many LWPs as possible to
 1043                          * the user / kernel boundary, so that they will
 1044                          * release any locks that they hold.
 1045                          */
 1046                         l->l_flag |= (LW_WREBOOT | LW_WSUSPEND);
 1047 
 1048                         if (l->l_stat == LSSLEEP &&
 1049                             (l->l_flag & LW_SINTR) != 0) {
 1050                                 /* setrunnable() will release the lock. */
 1051                                 setrunnable(l);
 1052                                 continue;
 1053                         }
 1054 
 1055                         lwp_unlock(l);
 1056                 }
 1057 
 1058                 mutex_exit(p->p_lock);
 1059         }
 1060         mutex_exit(proc_lock);
 1061 
 1062         /*
 1063          * Kick all CPUs to make them preempt any LWPs running in user mode. 
 1064          * They'll trap into the kernel and suspend themselves in userret().
 1065          */
 1066         for (CPU_INFO_FOREACH(cii, ci)) {
 1067                 spc_lock(ci);
 1068                 cpu_need_resched(ci, RESCHED_IMMED);
 1069                 spc_unlock(ci);
 1070         }
 1071 }
 1072 
 1073 /*
 1074  * sched_unsleep:
 1075  *
 1076  *      The is called when the LWP has not been awoken normally but instead
 1077  *      interrupted: for example, if the sleep timed out.  Because of this,
 1078  *      it's not a valid action for running or idle LWPs.
 1079  */
 1080 static u_int
 1081 sched_unsleep(struct lwp *l, bool cleanup)
 1082 {
 1083 
 1084         lwp_unlock(l);
 1085         panic("sched_unsleep");
 1086 }
 1087 
 1088 static void
 1089 resched_cpu(struct lwp *l)
 1090 {
 1091         struct cpu_info *ci = ci = l->l_cpu;
 1092 
 1093         KASSERT(lwp_locked(l, NULL));
 1094         if (lwp_eprio(l) > ci->ci_schedstate.spc_curpriority)
 1095                 cpu_need_resched(ci, 0);
 1096 }
 1097 
 1098 static void
 1099 sched_changepri(struct lwp *l, pri_t pri)
 1100 {
 1101 
 1102         KASSERT(lwp_locked(l, NULL));
 1103 
 1104         if (l->l_stat == LSRUN && (l->l_flag & LW_INMEM) != 0) {
 1105                 KASSERT(lwp_locked(l, l->l_cpu->ci_schedstate.spc_mutex));
 1106                 sched_dequeue(l);
 1107                 l->l_priority = pri;
 1108                 sched_enqueue(l, false);
 1109         } else {
 1110                 l->l_priority = pri;
 1111         }
 1112         resched_cpu(l);
 1113 }
 1114 
 1115 static void
 1116 sched_lendpri(struct lwp *l, pri_t pri)
 1117 {
 1118 
 1119         KASSERT(lwp_locked(l, NULL));
 1120 
 1121         if (l->l_stat == LSRUN && (l->l_flag & LW_INMEM) != 0) {
 1122                 KASSERT(lwp_locked(l, l->l_cpu->ci_schedstate.spc_mutex));
 1123                 sched_dequeue(l);
 1124                 l->l_inheritedprio = pri;
 1125                 sched_enqueue(l, false);
 1126         } else {
 1127                 l->l_inheritedprio = pri;
 1128         }
 1129         resched_cpu(l);
 1130 }
 1131 
 1132 struct lwp *
 1133 syncobj_noowner(wchan_t wchan)
 1134 {
 1135 
 1136         return NULL;
 1137 }
 1138 
 1139 /* Decay 95% of proc::p_pctcpu in 60 seconds, ccpu = exp(-1/20) */
 1140 const fixpt_t   ccpu = 0.95122942450071400909 * FSCALE;
 1141 
 1142 /*
 1143  * sched_pstats:
 1144  *
 1145  * Update process statistics and check CPU resource allocation.
 1146  * Call scheduler-specific hook to eventually adjust process/LWP
 1147  * priorities.
 1148  */
 1149 /* ARGSUSED */
 1150 void
 1151 sched_pstats(void *arg)
 1152 {
 1153         const int clkhz = (stathz != 0 ? stathz : hz);
 1154         static bool backwards;
 1155         struct rlimit *rlim;
 1156         struct lwp *l;
 1157         struct proc *p;
 1158         long runtm;
 1159         fixpt_t lpctcpu;
 1160         u_int lcpticks;
 1161         int sig;
 1162 
 1163         sched_pstats_ticks++;
 1164 
 1165         mutex_enter(proc_lock);
 1166         PROCLIST_FOREACH(p, &allproc) {
 1167                 if (__predict_false((p->p_flag & PK_MARKER) != 0))
 1168                         continue;
 1169 
 1170                 /*
 1171                  * Increment time in/out of memory and sleep
 1172                  * time (if sleeping), ignore overflow.
 1173                  */
 1174                 mutex_enter(p->p_lock);
 1175                 runtm = p->p_rtime.sec;
 1176                 LIST_FOREACH(l, &p->p_lwps, l_sibling) {
 1177                         if (__predict_false((l->l_flag & LW_IDLE) != 0))
 1178                                 continue;
 1179                         lwp_lock(l);
 1180                         runtm += l->l_rtime.sec;
 1181                         l->l_swtime++;
 1182                         sched_lwp_stats(l);
 1183                         lwp_unlock(l);
 1184 
 1185                         l->l_pctcpu = (l->l_pctcpu * ccpu) >> FSHIFT;
 1186                         if (l->l_slptime != 0)
 1187                                 continue;
 1188 
 1189                         lpctcpu = l->l_pctcpu;
 1190                         lcpticks = atomic_swap_uint(&l->l_cpticks, 0);
 1191                         lpctcpu += ((FSCALE - ccpu) *
 1192                             (lcpticks * FSCALE / clkhz)) >> FSHIFT;
 1193                         l->l_pctcpu = lpctcpu;
 1194                 }
 1195                 /* Calculating p_pctcpu only for ps(1) */
 1196                 p->p_pctcpu = (p->p_pctcpu * ccpu) >> FSHIFT;
 1197 
 1198                 /*
 1199                  * Check if the process exceeds its CPU resource allocation.
 1200                  * If over max, kill it.
 1201                  */
 1202                 rlim = &p->p_rlimit[RLIMIT_CPU];
 1203                 sig = 0;
 1204                 if (__predict_false(runtm >= rlim->rlim_cur)) {
 1205                         if (runtm >= rlim->rlim_max)
 1206                                 sig = SIGKILL;
 1207                         else {
 1208                                 sig = SIGXCPU;
 1209                                 if (rlim->rlim_cur < rlim->rlim_max)
 1210                                         rlim->rlim_cur += 5;
 1211                         }
 1212                 }
 1213                 mutex_exit(p->p_lock);
 1214                 if (__predict_false(runtm < 0)) {
 1215                         if (!backwards) {
 1216                                 backwards = true;
 1217                                 printf("WARNING: negative runtime; "
 1218                                     "monotonic clock has gone backwards\n");
 1219                         }
 1220                 } else if (__predict_false(sig)) {
 1221                         KASSERT((p->p_flag & PK_SYSTEM) == 0);
 1222                         psignal(p, sig);
 1223                 }
 1224         }
 1225         mutex_exit(proc_lock);
 1226         uvm_meter();
 1227         cv_wakeup(&lbolt);
 1228         callout_schedule(&sched_pstats_ch, hz);
 1229 }
Cache object: 586ee66496a7a83ea0697461c70b7d06
[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]
This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.
FreeBSD/Linux Kernel Cross Reference sys/kern/kern_synch.c

FreeBSD/Linux Kernel Cross Reference
sys/kern/kern_synch.c