The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/subr_sleepqueue.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
    3  *
    4  * Copyright (c) 2004 John Baldwin <jhb@FreeBSD.org>
    5  *
    6  * Redistribution and use in source and binary forms, with or without
    7  * modification, are permitted provided that the following conditions
    8  * are met:
    9  * 1. Redistributions of source code must retain the above copyright
   10  *    notice, this list of conditions and the following disclaimer.
   11  * 2. Redistributions in binary form must reproduce the above copyright
   12  *    notice, this list of conditions and the following disclaimer in the
   13  *    documentation and/or other materials provided with the distribution.
   14  *
   15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   25  * SUCH DAMAGE.
   26  */
   27 
   28 /*
   29  * Implementation of sleep queues used to hold queue of threads blocked on
   30  * a wait channel.  Sleep queues are different from turnstiles in that wait
   31  * channels are not owned by anyone, so there is no priority propagation.
   32  * Sleep queues can also provide a timeout and can also be interrupted by
   33  * signals.  That said, there are several similarities between the turnstile
   34  * and sleep queue implementations.  (Note: turnstiles were implemented
   35  * first.)  For example, both use a hash table of the same size where each
   36  * bucket is referred to as a "chain" that contains both a spin lock and
   37  * a linked list of queues.  An individual queue is located by using a hash
   38  * to pick a chain, locking the chain, and then walking the chain searching
   39  * for the queue.  This means that a wait channel object does not need to
   40  * embed its queue head just as locks do not embed their turnstile queue
   41  * head.  Threads also carry around a sleep queue that they lend to the
   42  * wait channel when blocking.  Just as in turnstiles, the queue includes
   43  * a free list of the sleep queues of other threads blocked on the same
   44  * wait channel in the case of multiple waiters.
   45  *
   46  * Some additional functionality provided by sleep queues include the
   47  * ability to set a timeout.  The timeout is managed using a per-thread
   48  * callout that resumes a thread if it is asleep.  A thread may also
   49  * catch signals while it is asleep (aka an interruptible sleep).  The
   50  * signal code uses sleepq_abort() to interrupt a sleeping thread.  Finally,
   51  * sleep queues also provide some extra assertions.  One is not allowed to
   52  * mix the sleep/wakeup and cv APIs for a given wait channel.  Also, one
   53  * must consistently use the same lock to synchronize with a wait channel,
   54  * though this check is currently only a warning for sleep/wakeup due to
   55  * pre-existing abuse of that API.  The same lock must also be held when
   56  * awakening threads, though that is currently only enforced for condition
   57  * variables.
   58  */
   59 
   60 #include <sys/cdefs.h>
   61 __FBSDID("$FreeBSD$");
   62 
   63 #include "opt_sleepqueue_profiling.h"
   64 #include "opt_ddb.h"
   65 #include "opt_sched.h"
   66 #include "opt_stack.h"
   67 
   68 #include <sys/param.h>
   69 #include <sys/systm.h>
   70 #include <sys/lock.h>
   71 #include <sys/kernel.h>
   72 #include <sys/ktr.h>
   73 #include <sys/mutex.h>
   74 #include <sys/proc.h>
   75 #include <sys/sbuf.h>
   76 #include <sys/sched.h>
   77 #include <sys/sdt.h>
   78 #include <sys/signalvar.h>
   79 #include <sys/sleepqueue.h>
   80 #include <sys/stack.h>
   81 #include <sys/sysctl.h>
   82 #include <sys/time.h>
   83 #ifdef EPOCH_TRACE
   84 #include <sys/epoch.h>
   85 #endif
   86 
   87 #include <machine/atomic.h>
   88 
   89 #include <vm/uma.h>
   90 
   91 #ifdef DDB
   92 #include <ddb/ddb.h>
   93 #endif
   94 
   95 /*
   96  * Constants for the hash table of sleep queue chains.
   97  * SC_TABLESIZE must be a power of two for SC_MASK to work properly.
   98  */
   99 #ifndef SC_TABLESIZE
  100 #define SC_TABLESIZE    256
  101 #endif
  102 CTASSERT(powerof2(SC_TABLESIZE));
  103 #define SC_MASK         (SC_TABLESIZE - 1)
  104 #define SC_SHIFT        8
  105 #define SC_HASH(wc)     ((((uintptr_t)(wc) >> SC_SHIFT) ^ (uintptr_t)(wc)) & \
  106                             SC_MASK)
  107 #define SC_LOOKUP(wc)   &sleepq_chains[SC_HASH(wc)]
  108 #define NR_SLEEPQS      2
  109 /*
  110  * There are two different lists of sleep queues.  Both lists are connected
  111  * via the sq_hash entries.  The first list is the sleep queue chain list
  112  * that a sleep queue is on when it is attached to a wait channel.  The
  113  * second list is the free list hung off of a sleep queue that is attached
  114  * to a wait channel.
  115  *
  116  * Each sleep queue also contains the wait channel it is attached to, the
  117  * list of threads blocked on that wait channel, flags specific to the
  118  * wait channel, and the lock used to synchronize with a wait channel.
  119  * The flags are used to catch mismatches between the various consumers
  120  * of the sleep queue API (e.g. sleep/wakeup and condition variables).
  121  * The lock pointer is only used when invariants are enabled for various
  122  * debugging checks.
  123  *
  124  * Locking key:
  125  *  c - sleep queue chain lock
  126  */
  127 struct sleepqueue {
  128         struct threadqueue sq_blocked[NR_SLEEPQS]; /* (c) Blocked threads. */
  129         u_int sq_blockedcnt[NR_SLEEPQS];        /* (c) N. of blocked threads. */
  130         LIST_ENTRY(sleepqueue) sq_hash;         /* (c) Chain and free list. */
  131         LIST_HEAD(, sleepqueue) sq_free;        /* (c) Free queues. */
  132         const void      *sq_wchan;              /* (c) Wait channel. */
  133         int     sq_type;                        /* (c) Queue type. */
  134 #ifdef INVARIANTS
  135         struct lock_object *sq_lock;            /* (c) Associated lock. */
  136 #endif
  137 };
  138 
  139 struct sleepqueue_chain {
  140         LIST_HEAD(, sleepqueue) sc_queues;      /* List of sleep queues. */
  141         struct mtx sc_lock;                     /* Spin lock for this chain. */
  142 #ifdef SLEEPQUEUE_PROFILING
  143         u_int   sc_depth;                       /* Length of sc_queues. */
  144         u_int   sc_max_depth;                   /* Max length of sc_queues. */
  145 #endif
  146 } __aligned(CACHE_LINE_SIZE);
  147 
  148 #ifdef SLEEPQUEUE_PROFILING
  149 static SYSCTL_NODE(_debug, OID_AUTO, sleepq, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
  150     "sleepq profiling");
  151 static SYSCTL_NODE(_debug_sleepq, OID_AUTO, chains,
  152     CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
  153     "sleepq chain stats");
  154 static u_int sleepq_max_depth;
  155 SYSCTL_UINT(_debug_sleepq, OID_AUTO, max_depth, CTLFLAG_RD, &sleepq_max_depth,
  156     0, "maxmimum depth achieved of a single chain");
  157 
  158 static void     sleepq_profile(const char *wmesg);
  159 static int      prof_enabled;
  160 #endif
  161 static struct sleepqueue_chain sleepq_chains[SC_TABLESIZE];
  162 static uma_zone_t sleepq_zone;
  163 
  164 /*
  165  * Prototypes for non-exported routines.
  166  */
  167 static int      sleepq_catch_signals(const void *wchan, int pri);
  168 static inline int sleepq_check_signals(void);
  169 static inline int sleepq_check_timeout(void);
  170 #ifdef INVARIANTS
  171 static void     sleepq_dtor(void *mem, int size, void *arg);
  172 #endif
  173 static int      sleepq_init(void *mem, int size, int flags);
  174 static int      sleepq_resume_thread(struct sleepqueue *sq, struct thread *td,
  175                     int pri, int srqflags);
  176 static void     sleepq_remove_thread(struct sleepqueue *sq, struct thread *td);
  177 static void     sleepq_switch(const void *wchan, int pri);
  178 static void     sleepq_timeout(void *arg);
  179 
  180 SDT_PROBE_DECLARE(sched, , , sleep);
  181 SDT_PROBE_DECLARE(sched, , , wakeup);
  182 
  183 /*
  184  * Initialize SLEEPQUEUE_PROFILING specific sysctl nodes.
  185  * Note that it must happen after sleepinit() has been fully executed, so
  186  * it must happen after SI_SUB_KMEM SYSINIT() subsystem setup.
  187  */
  188 #ifdef SLEEPQUEUE_PROFILING
  189 static void
  190 init_sleepqueue_profiling(void)
  191 {
  192         char chain_name[10];
  193         struct sysctl_oid *chain_oid;
  194         u_int i;
  195 
  196         for (i = 0; i < SC_TABLESIZE; i++) {
  197                 snprintf(chain_name, sizeof(chain_name), "%u", i);
  198                 chain_oid = SYSCTL_ADD_NODE(NULL,
  199                     SYSCTL_STATIC_CHILDREN(_debug_sleepq_chains), OID_AUTO,
  200                     chain_name, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
  201                     "sleepq chain stats");
  202                 SYSCTL_ADD_UINT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO,
  203                     "depth", CTLFLAG_RD, &sleepq_chains[i].sc_depth, 0, NULL);
  204                 SYSCTL_ADD_UINT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO,
  205                     "max_depth", CTLFLAG_RD, &sleepq_chains[i].sc_max_depth, 0,
  206                     NULL);
  207         }
  208 }
  209 
  210 SYSINIT(sleepqueue_profiling, SI_SUB_LOCK, SI_ORDER_ANY,
  211     init_sleepqueue_profiling, NULL);
  212 #endif
  213 
  214 /*
  215  * Early initialization of sleep queues that is called from the sleepinit()
  216  * SYSINIT.
  217  */
  218 void
  219 init_sleepqueues(void)
  220 {
  221         int i;
  222 
  223         for (i = 0; i < SC_TABLESIZE; i++) {
  224                 LIST_INIT(&sleepq_chains[i].sc_queues);
  225                 mtx_init(&sleepq_chains[i].sc_lock, "sleepq chain", NULL,
  226                     MTX_SPIN);
  227         }
  228         sleepq_zone = uma_zcreate("SLEEPQUEUE", sizeof(struct sleepqueue),
  229 #ifdef INVARIANTS
  230             NULL, sleepq_dtor, sleepq_init, NULL, UMA_ALIGN_CACHE, 0);
  231 #else
  232             NULL, NULL, sleepq_init, NULL, UMA_ALIGN_CACHE, 0);
  233 #endif
  234 
  235         thread0.td_sleepqueue = sleepq_alloc();
  236 }
  237 
  238 /*
  239  * Get a sleep queue for a new thread.
  240  */
  241 struct sleepqueue *
  242 sleepq_alloc(void)
  243 {
  244 
  245         return (uma_zalloc(sleepq_zone, M_WAITOK));
  246 }
  247 
  248 /*
  249  * Free a sleep queue when a thread is destroyed.
  250  */
  251 void
  252 sleepq_free(struct sleepqueue *sq)
  253 {
  254 
  255         uma_zfree(sleepq_zone, sq);
  256 }
  257 
  258 /*
  259  * Lock the sleep queue chain associated with the specified wait channel.
  260  */
  261 void
  262 sleepq_lock(const void *wchan)
  263 {
  264         struct sleepqueue_chain *sc;
  265 
  266         sc = SC_LOOKUP(wchan);
  267         mtx_lock_spin(&sc->sc_lock);
  268 }
  269 
  270 /*
  271  * Look up the sleep queue associated with a given wait channel in the hash
  272  * table locking the associated sleep queue chain.  If no queue is found in
  273  * the table, NULL is returned.
  274  */
  275 struct sleepqueue *
  276 sleepq_lookup(const void *wchan)
  277 {
  278         struct sleepqueue_chain *sc;
  279         struct sleepqueue *sq;
  280 
  281         KASSERT(wchan != NULL, ("%s: invalid NULL wait channel", __func__));
  282         sc = SC_LOOKUP(wchan);
  283         mtx_assert(&sc->sc_lock, MA_OWNED);
  284         LIST_FOREACH(sq, &sc->sc_queues, sq_hash)
  285                 if (sq->sq_wchan == wchan)
  286                         return (sq);
  287         return (NULL);
  288 }
  289 
  290 /*
  291  * Unlock the sleep queue chain associated with a given wait channel.
  292  */
  293 void
  294 sleepq_release(const void *wchan)
  295 {
  296         struct sleepqueue_chain *sc;
  297 
  298         sc = SC_LOOKUP(wchan);
  299         mtx_unlock_spin(&sc->sc_lock);
  300 }
  301 
  302 /*
  303  * Places the current thread on the sleep queue for the specified wait
  304  * channel.  If INVARIANTS is enabled, then it associates the passed in
  305  * lock with the sleepq to make sure it is held when that sleep queue is
  306  * woken up.
  307  */
  308 void
  309 sleepq_add(const void *wchan, struct lock_object *lock, const char *wmesg,
  310     int flags, int queue)
  311 {
  312         struct sleepqueue_chain *sc;
  313         struct sleepqueue *sq;
  314         struct thread *td;
  315 
  316         td = curthread;
  317         sc = SC_LOOKUP(wchan);
  318         mtx_assert(&sc->sc_lock, MA_OWNED);
  319         MPASS(td->td_sleepqueue != NULL);
  320         MPASS(wchan != NULL);
  321         MPASS((queue >= 0) && (queue < NR_SLEEPQS));
  322 
  323         /* If this thread is not allowed to sleep, die a horrible death. */
  324         if (__predict_false(!THREAD_CAN_SLEEP())) {
  325 #ifdef EPOCH_TRACE
  326                 epoch_trace_list(curthread);
  327 #endif
  328                 KASSERT(0,
  329                     ("%s: td %p to sleep on wchan %p with sleeping prohibited",
  330                     __func__, td, wchan));
  331         }
  332 
  333         /* Look up the sleep queue associated with the wait channel 'wchan'. */
  334         sq = sleepq_lookup(wchan);
  335 
  336         /*
  337          * If the wait channel does not already have a sleep queue, use
  338          * this thread's sleep queue.  Otherwise, insert the current thread
  339          * into the sleep queue already in use by this wait channel.
  340          */
  341         if (sq == NULL) {
  342 #ifdef INVARIANTS
  343                 int i;
  344 
  345                 sq = td->td_sleepqueue;
  346                 for (i = 0; i < NR_SLEEPQS; i++) {
  347                         KASSERT(TAILQ_EMPTY(&sq->sq_blocked[i]),
  348                             ("thread's sleep queue %d is not empty", i));
  349                         KASSERT(sq->sq_blockedcnt[i] == 0,
  350                             ("thread's sleep queue %d count mismatches", i));
  351                 }
  352                 KASSERT(LIST_EMPTY(&sq->sq_free),
  353                     ("thread's sleep queue has a non-empty free list"));
  354                 KASSERT(sq->sq_wchan == NULL, ("stale sq_wchan pointer"));
  355                 sq->sq_lock = lock;
  356 #endif
  357 #ifdef SLEEPQUEUE_PROFILING
  358                 sc->sc_depth++;
  359                 if (sc->sc_depth > sc->sc_max_depth) {
  360                         sc->sc_max_depth = sc->sc_depth;
  361                         if (sc->sc_max_depth > sleepq_max_depth)
  362                                 sleepq_max_depth = sc->sc_max_depth;
  363                 }
  364 #endif
  365                 sq = td->td_sleepqueue;
  366                 LIST_INSERT_HEAD(&sc->sc_queues, sq, sq_hash);
  367                 sq->sq_wchan = wchan;
  368                 sq->sq_type = flags & SLEEPQ_TYPE;
  369         } else {
  370                 MPASS(wchan == sq->sq_wchan);
  371                 MPASS(lock == sq->sq_lock);
  372                 MPASS((flags & SLEEPQ_TYPE) == sq->sq_type);
  373                 LIST_INSERT_HEAD(&sq->sq_free, td->td_sleepqueue, sq_hash);
  374         }
  375         thread_lock(td);
  376         TAILQ_INSERT_TAIL(&sq->sq_blocked[queue], td, td_slpq);
  377         sq->sq_blockedcnt[queue]++;
  378         td->td_sleepqueue = NULL;
  379         td->td_sqqueue = queue;
  380         td->td_wchan = wchan;
  381         td->td_wmesg = wmesg;
  382         if (flags & SLEEPQ_INTERRUPTIBLE) {
  383                 td->td_intrval = 0;
  384                 td->td_flags |= TDF_SINTR;
  385         }
  386         td->td_flags &= ~TDF_TIMEOUT;
  387         thread_unlock(td);
  388 }
  389 
  390 /*
  391  * Sets a timeout that will remove the current thread from the
  392  * specified sleep queue at the specified time if the thread has not
  393  * already been awakened.  Flags are from C_* (callout) namespace.
  394  */
  395 void
  396 sleepq_set_timeout_sbt(const void *wchan, sbintime_t sbt, sbintime_t pr,
  397     int flags)
  398 {
  399         struct sleepqueue_chain *sc __unused;
  400         struct thread *td;
  401         sbintime_t pr1;
  402 
  403         td = curthread;
  404         sc = SC_LOOKUP(wchan);
  405         mtx_assert(&sc->sc_lock, MA_OWNED);
  406         MPASS(TD_ON_SLEEPQ(td));
  407         MPASS(td->td_sleepqueue == NULL);
  408         MPASS(wchan != NULL);
  409         if (cold && td == &thread0)
  410                 panic("timed sleep before timers are working");
  411         KASSERT(td->td_sleeptimo == 0, ("td %d %p td_sleeptimo %jx",
  412             td->td_tid, td, (uintmax_t)td->td_sleeptimo));
  413         thread_lock(td);
  414         callout_when(sbt, pr, flags, &td->td_sleeptimo, &pr1);
  415         thread_unlock(td);
  416         callout_reset_sbt_on(&td->td_slpcallout, td->td_sleeptimo, pr1,
  417             sleepq_timeout, td, PCPU_GET(cpuid), flags | C_PRECALC |
  418             C_DIRECT_EXEC);
  419 }
  420 
  421 /*
  422  * Return the number of actual sleepers for the specified queue.
  423  */
  424 u_int
  425 sleepq_sleepcnt(const void *wchan, int queue)
  426 {
  427         struct sleepqueue *sq;
  428 
  429         KASSERT(wchan != NULL, ("%s: invalid NULL wait channel", __func__));
  430         MPASS((queue >= 0) && (queue < NR_SLEEPQS));
  431         sq = sleepq_lookup(wchan);
  432         if (sq == NULL)
  433                 return (0);
  434         return (sq->sq_blockedcnt[queue]);
  435 }
  436 
  437 static int
  438 sleepq_check_ast_sc_locked(struct thread *td, struct sleepqueue_chain *sc)
  439 {
  440         struct proc *p;
  441         int ret;
  442 
  443         mtx_assert(&sc->sc_lock, MA_OWNED);
  444 
  445         if ((td->td_pflags & TDP_WAKEUP) != 0) {
  446                 td->td_pflags &= ~TDP_WAKEUP;
  447                 thread_lock(td);
  448                 return (EINTR);
  449         }
  450 
  451         /*
  452          * See if there are any pending signals or suspension requests for this
  453          * thread.  If not, we can switch immediately.
  454          */
  455         thread_lock(td);
  456         if (!td_ast_pending(td, TDA_SIG) && !td_ast_pending(td, TDA_SUSPEND))
  457                 return (0);
  458 
  459         thread_unlock(td);
  460         mtx_unlock_spin(&sc->sc_lock);
  461 
  462         p = td->td_proc;
  463         CTR3(KTR_PROC, "sleepq catching signals: thread %p (pid %ld, %s)",
  464             (void *)td, (long)p->p_pid, td->td_name);
  465         PROC_LOCK(p);
  466 
  467         /*
  468          * Check for suspension first. Checking for signals and then
  469          * suspending could result in a missed signal, since a signal
  470          * can be delivered while this thread is suspended.
  471          */
  472         ret = sig_ast_checksusp(td);
  473         if (ret != 0) {
  474                 PROC_UNLOCK(p);
  475                 mtx_lock_spin(&sc->sc_lock);
  476                 thread_lock(td);
  477                 return (ret);
  478         }
  479 
  480         ret = sig_ast_needsigchk(td);
  481 
  482         /*
  483          * Lock the per-process spinlock prior to dropping the
  484          * PROC_LOCK to avoid a signal delivery race.
  485          * PROC_LOCK, PROC_SLOCK, and thread_lock() are
  486          * currently held in tdsendsignal() and thread_single().
  487          */
  488         PROC_SLOCK(p);
  489         mtx_lock_spin(&sc->sc_lock);
  490         PROC_UNLOCK(p);
  491         thread_lock(td);
  492         PROC_SUNLOCK(p);
  493 
  494         return (ret);
  495 }
  496 
  497 /*
  498  * Marks the pending sleep of the current thread as interruptible and
  499  * makes an initial check for pending signals before putting a thread
  500  * to sleep. Enters and exits with the thread lock held.  Thread lock
  501  * may have transitioned from the sleepq lock to a run lock.
  502  */
  503 static int
  504 sleepq_catch_signals(const void *wchan, int pri)
  505 {
  506         struct thread *td;
  507         struct sleepqueue_chain *sc;
  508         struct sleepqueue *sq;
  509         int ret;
  510 
  511         sc = SC_LOOKUP(wchan);
  512         mtx_assert(&sc->sc_lock, MA_OWNED);
  513         MPASS(wchan != NULL);
  514         td = curthread;
  515 
  516         ret = sleepq_check_ast_sc_locked(td, sc);
  517         THREAD_LOCK_ASSERT(td, MA_OWNED);
  518         mtx_assert(&sc->sc_lock, MA_OWNED);
  519 
  520         if (ret == 0) {
  521                 /*
  522                  * No pending signals and no suspension requests found.
  523                  * Switch the thread off the cpu.
  524                  */
  525                 sleepq_switch(wchan, pri);
  526         } else {
  527                 /*
  528                  * There were pending signals and this thread is still
  529                  * on the sleep queue, remove it from the sleep queue.
  530                  */
  531                 if (TD_ON_SLEEPQ(td)) {
  532                         sq = sleepq_lookup(wchan);
  533                         sleepq_remove_thread(sq, td);
  534                 }
  535                 MPASS(td->td_lock != &sc->sc_lock);
  536                 mtx_unlock_spin(&sc->sc_lock);
  537                 thread_unlock(td);
  538         }
  539         return (ret);
  540 }
  541 
  542 /*
  543  * Switches to another thread if we are still asleep on a sleep queue.
  544  * Returns with thread lock.
  545  */
  546 static void
  547 sleepq_switch(const void *wchan, int pri)
  548 {
  549         struct sleepqueue_chain *sc;
  550         struct sleepqueue *sq;
  551         struct thread *td;
  552         bool rtc_changed;
  553 
  554         td = curthread;
  555         sc = SC_LOOKUP(wchan);
  556         mtx_assert(&sc->sc_lock, MA_OWNED);
  557         THREAD_LOCK_ASSERT(td, MA_OWNED);
  558 
  559         /*
  560          * If we have a sleep queue, then we've already been woken up, so
  561          * just return.
  562          */
  563         if (td->td_sleepqueue != NULL) {
  564                 mtx_unlock_spin(&sc->sc_lock);
  565                 thread_unlock(td);
  566                 return;
  567         }
  568 
  569         /*
  570          * If TDF_TIMEOUT is set, then our sleep has been timed out
  571          * already but we are still on the sleep queue, so dequeue the
  572          * thread and return.
  573          *
  574          * Do the same if the real-time clock has been adjusted since this
  575          * thread calculated its timeout based on that clock.  This handles
  576          * the following race:
  577          * - The Ts thread needs to sleep until an absolute real-clock time.
  578          *   It copies the global rtc_generation into curthread->td_rtcgen,
  579          *   reads the RTC, and calculates a sleep duration based on that time.
  580          *   See umtxq_sleep() for an example.
  581          * - The Tc thread adjusts the RTC, bumps rtc_generation, and wakes
  582          *   threads that are sleeping until an absolute real-clock time.
  583          *   See tc_setclock() and the POSIX specification of clock_settime().
  584          * - Ts reaches the code below.  It holds the sleepqueue chain lock,
  585          *   so Tc has finished waking, so this thread must test td_rtcgen.
  586          * (The declaration of td_rtcgen refers to this comment.)
  587          */
  588         rtc_changed = td->td_rtcgen != 0 && td->td_rtcgen != rtc_generation;
  589         if ((td->td_flags & TDF_TIMEOUT) || rtc_changed) {
  590                 if (rtc_changed) {
  591                         td->td_rtcgen = 0;
  592                 }
  593                 MPASS(TD_ON_SLEEPQ(td));
  594                 sq = sleepq_lookup(wchan);
  595                 sleepq_remove_thread(sq, td);
  596                 mtx_unlock_spin(&sc->sc_lock);
  597                 thread_unlock(td);
  598                 return;
  599         }
  600 #ifdef SLEEPQUEUE_PROFILING
  601         if (prof_enabled)
  602                 sleepq_profile(td->td_wmesg);
  603 #endif
  604         MPASS(td->td_sleepqueue == NULL);
  605         sched_sleep(td, pri);
  606         thread_lock_set(td, &sc->sc_lock);
  607         SDT_PROBE0(sched, , , sleep);
  608         TD_SET_SLEEPING(td);
  609         mi_switch(SW_VOL | SWT_SLEEPQ);
  610         KASSERT(TD_IS_RUNNING(td), ("running but not TDS_RUNNING"));
  611         CTR3(KTR_PROC, "sleepq resume: thread %p (pid %ld, %s)",
  612             (void *)td, (long)td->td_proc->p_pid, (void *)td->td_name);
  613 }
  614 
  615 /*
  616  * Check to see if we timed out.
  617  */
  618 static inline int
  619 sleepq_check_timeout(void)
  620 {
  621         struct thread *td;
  622         int res;
  623 
  624         res = 0;
  625         td = curthread;
  626         if (td->td_sleeptimo != 0) {
  627                 if (td->td_sleeptimo <= sbinuptime())
  628                         res = EWOULDBLOCK;
  629                 td->td_sleeptimo = 0;
  630         }
  631         return (res);
  632 }
  633 
  634 /*
  635  * Check to see if we were awoken by a signal.
  636  */
  637 static inline int
  638 sleepq_check_signals(void)
  639 {
  640         struct thread *td;
  641 
  642         td = curthread;
  643         KASSERT((td->td_flags & TDF_SINTR) == 0,
  644             ("thread %p still in interruptible sleep?", td));
  645 
  646         return (td->td_intrval);
  647 }
  648 
  649 /*
  650  * Block the current thread until it is awakened from its sleep queue.
  651  */
  652 void
  653 sleepq_wait(const void *wchan, int pri)
  654 {
  655         struct thread *td;
  656 
  657         td = curthread;
  658         MPASS(!(td->td_flags & TDF_SINTR));
  659         thread_lock(td);
  660         sleepq_switch(wchan, pri);
  661 }
  662 
  663 /*
  664  * Block the current thread until it is awakened from its sleep queue
  665  * or it is interrupted by a signal.
  666  */
  667 int
  668 sleepq_wait_sig(const void *wchan, int pri)
  669 {
  670         int rcatch;
  671 
  672         rcatch = sleepq_catch_signals(wchan, pri);
  673         if (rcatch)
  674                 return (rcatch);
  675         return (sleepq_check_signals());
  676 }
  677 
  678 /*
  679  * Block the current thread until it is awakened from its sleep queue
  680  * or it times out while waiting.
  681  */
  682 int
  683 sleepq_timedwait(const void *wchan, int pri)
  684 {
  685         struct thread *td;
  686 
  687         td = curthread;
  688         MPASS(!(td->td_flags & TDF_SINTR));
  689 
  690         thread_lock(td);
  691         sleepq_switch(wchan, pri);
  692 
  693         return (sleepq_check_timeout());
  694 }
  695 
  696 /*
  697  * Block the current thread until it is awakened from its sleep queue,
  698  * it is interrupted by a signal, or it times out waiting to be awakened.
  699  */
  700 int
  701 sleepq_timedwait_sig(const void *wchan, int pri)
  702 {
  703         int rcatch, rvalt, rvals;
  704 
  705         rcatch = sleepq_catch_signals(wchan, pri);
  706         /* We must always call check_timeout() to clear sleeptimo. */
  707         rvalt = sleepq_check_timeout();
  708         rvals = sleepq_check_signals();
  709         if (rcatch)
  710                 return (rcatch);
  711         if (rvals)
  712                 return (rvals);
  713         return (rvalt);
  714 }
  715 
  716 /*
  717  * Returns the type of sleepqueue given a waitchannel.
  718  */
  719 int
  720 sleepq_type(const void *wchan)
  721 {
  722         struct sleepqueue *sq;
  723         int type;
  724 
  725         MPASS(wchan != NULL);
  726 
  727         sq = sleepq_lookup(wchan);
  728         if (sq == NULL)
  729                 return (-1);
  730         type = sq->sq_type;
  731 
  732         return (type);
  733 }
  734 
  735 /*
  736  * Removes a thread from a sleep queue and makes it
  737  * runnable.
  738  *
  739  * Requires the sc chain locked on entry.  If SRQ_HOLD is specified it will
  740  * be locked on return.  Returns without the thread lock held.
  741  */
  742 static int
  743 sleepq_resume_thread(struct sleepqueue *sq, struct thread *td, int pri,
  744     int srqflags)
  745 {
  746         struct sleepqueue_chain *sc;
  747         bool drop;
  748 
  749         MPASS(td != NULL);
  750         MPASS(sq->sq_wchan != NULL);
  751         MPASS(td->td_wchan == sq->sq_wchan);
  752 
  753         sc = SC_LOOKUP(sq->sq_wchan);
  754         mtx_assert(&sc->sc_lock, MA_OWNED);
  755 
  756         /*
  757          * Avoid recursing on the chain lock.  If the locks don't match we
  758          * need to acquire the thread lock which setrunnable will drop for
  759          * us.  In this case we need to drop the chain lock afterwards.
  760          *
  761          * There is no race that will make td_lock equal to sc_lock because
  762          * we hold sc_lock.
  763          */
  764         drop = false;
  765         if (!TD_IS_SLEEPING(td)) {
  766                 thread_lock(td);
  767                 drop = true;
  768         } else
  769                 thread_lock_block_wait(td);
  770 
  771         /* Remove thread from the sleepq. */
  772         sleepq_remove_thread(sq, td);
  773 
  774         /* If we're done with the sleepqueue release it. */
  775         if ((srqflags & SRQ_HOLD) == 0 && drop)
  776                 mtx_unlock_spin(&sc->sc_lock);
  777 
  778         /* Adjust priority if requested. */
  779         MPASS(pri == 0 || (pri >= PRI_MIN && pri <= PRI_MAX));
  780         if (pri != 0 && td->td_priority > pri &&
  781             PRI_BASE(td->td_pri_class) == PRI_TIMESHARE)
  782                 sched_prio(td, pri);
  783 
  784         /*
  785          * Note that thread td might not be sleeping if it is running
  786          * sleepq_catch_signals() on another CPU or is blocked on its
  787          * proc lock to check signals.  There's no need to mark the
  788          * thread runnable in that case.
  789          */
  790         if (TD_IS_SLEEPING(td)) {
  791                 MPASS(!drop);
  792                 TD_CLR_SLEEPING(td);
  793                 return (setrunnable(td, srqflags));
  794         }
  795         MPASS(drop);
  796         thread_unlock(td);
  797 
  798         return (0);
  799 }
  800 
  801 static void
  802 sleepq_remove_thread(struct sleepqueue *sq, struct thread *td)
  803 {
  804         struct sleepqueue_chain *sc __unused;
  805 
  806         MPASS(td != NULL);
  807         MPASS(sq->sq_wchan != NULL);
  808         MPASS(td->td_wchan == sq->sq_wchan);
  809         MPASS(td->td_sqqueue < NR_SLEEPQS && td->td_sqqueue >= 0);
  810         THREAD_LOCK_ASSERT(td, MA_OWNED);
  811         sc = SC_LOOKUP(sq->sq_wchan);
  812         mtx_assert(&sc->sc_lock, MA_OWNED);
  813 
  814         SDT_PROBE2(sched, , , wakeup, td, td->td_proc);
  815 
  816         /* Remove the thread from the queue. */
  817         sq->sq_blockedcnt[td->td_sqqueue]--;
  818         TAILQ_REMOVE(&sq->sq_blocked[td->td_sqqueue], td, td_slpq);
  819 
  820         /*
  821          * Get a sleep queue for this thread.  If this is the last waiter,
  822          * use the queue itself and take it out of the chain, otherwise,
  823          * remove a queue from the free list.
  824          */
  825         if (LIST_EMPTY(&sq->sq_free)) {
  826                 td->td_sleepqueue = sq;
  827 #ifdef INVARIANTS
  828                 sq->sq_wchan = NULL;
  829 #endif
  830 #ifdef SLEEPQUEUE_PROFILING
  831                 sc->sc_depth--;
  832 #endif
  833         } else
  834                 td->td_sleepqueue = LIST_FIRST(&sq->sq_free);
  835         LIST_REMOVE(td->td_sleepqueue, sq_hash);
  836 
  837         if ((td->td_flags & TDF_TIMEOUT) == 0 && td->td_sleeptimo != 0 &&
  838             td->td_lock == &sc->sc_lock) {
  839                 /*
  840                  * We ignore the situation where timeout subsystem was
  841                  * unable to stop our callout.  The struct thread is
  842                  * type-stable, the callout will use the correct
  843                  * memory when running.  The checks of the
  844                  * td_sleeptimo value in this function and in
  845                  * sleepq_timeout() ensure that the thread does not
  846                  * get spurious wakeups, even if the callout was reset
  847                  * or thread reused.
  848                  *
  849                  * We also cannot safely stop the callout if a scheduler
  850                  * lock is held since softclock_thread() forces a lock
  851                  * order of callout lock -> scheduler lock.  The thread
  852                  * lock will be a scheduler lock only if the thread is
  853                  * preparing to go to sleep, so this is hopefully a rare
  854                  * scenario.
  855                  */
  856                 callout_stop(&td->td_slpcallout);
  857         }
  858 
  859         td->td_wmesg = NULL;
  860         td->td_wchan = NULL;
  861         td->td_flags &= ~(TDF_SINTR | TDF_TIMEOUT);
  862 
  863         CTR3(KTR_PROC, "sleepq_wakeup: thread %p (pid %ld, %s)",
  864             (void *)td, (long)td->td_proc->p_pid, td->td_name);
  865 }
  866 
  867 void
  868 sleepq_remove_nested(struct thread *td)
  869 {
  870         struct sleepqueue_chain *sc;
  871         struct sleepqueue *sq;
  872         const void *wchan;
  873 
  874         MPASS(TD_ON_SLEEPQ(td));
  875 
  876         wchan = td->td_wchan;
  877         sc = SC_LOOKUP(wchan);
  878         mtx_lock_spin(&sc->sc_lock);
  879         sq = sleepq_lookup(wchan);
  880         MPASS(sq != NULL);
  881         thread_lock(td);
  882         sleepq_remove_thread(sq, td);
  883         mtx_unlock_spin(&sc->sc_lock);
  884         /* Returns with the thread lock owned. */
  885 }
  886 
  887 #ifdef INVARIANTS
  888 /*
  889  * UMA zone item deallocator.
  890  */
  891 static void
  892 sleepq_dtor(void *mem, int size, void *arg)
  893 {
  894         struct sleepqueue *sq;
  895         int i;
  896 
  897         sq = mem;
  898         for (i = 0; i < NR_SLEEPQS; i++) {
  899                 MPASS(TAILQ_EMPTY(&sq->sq_blocked[i]));
  900                 MPASS(sq->sq_blockedcnt[i] == 0);
  901         }
  902 }
  903 #endif
  904 
  905 /*
  906  * UMA zone item initializer.
  907  */
  908 static int
  909 sleepq_init(void *mem, int size, int flags)
  910 {
  911         struct sleepqueue *sq;
  912         int i;
  913 
  914         bzero(mem, size);
  915         sq = mem;
  916         for (i = 0; i < NR_SLEEPQS; i++) {
  917                 TAILQ_INIT(&sq->sq_blocked[i]);
  918                 sq->sq_blockedcnt[i] = 0;
  919         }
  920         LIST_INIT(&sq->sq_free);
  921         return (0);
  922 }
  923 
  924 /*
  925  * Find thread sleeping on a wait channel and resume it.
  926  */
  927 int
  928 sleepq_signal(const void *wchan, int flags, int pri, int queue)
  929 {
  930         struct sleepqueue_chain *sc;
  931         struct sleepqueue *sq;
  932         struct threadqueue *head;
  933         struct thread *td, *besttd;
  934         int wakeup_swapper;
  935 
  936         CTR2(KTR_PROC, "sleepq_signal(%p, %d)", wchan, flags);
  937         KASSERT(wchan != NULL, ("%s: invalid NULL wait channel", __func__));
  938         MPASS((queue >= 0) && (queue < NR_SLEEPQS));
  939         sq = sleepq_lookup(wchan);
  940         if (sq == NULL) {
  941                 if (flags & SLEEPQ_DROP)
  942                         sleepq_release(wchan);
  943                 return (0);
  944         }
  945         KASSERT(sq->sq_type == (flags & SLEEPQ_TYPE),
  946             ("%s: mismatch between sleep/wakeup and cv_*", __func__));
  947 
  948         head = &sq->sq_blocked[queue];
  949         if (flags & SLEEPQ_UNFAIR) {
  950                 /*
  951                  * Find the most recently sleeping thread, but try to
  952                  * skip threads still in process of context switch to
  953                  * avoid spinning on the thread lock.
  954                  */
  955                 sc = SC_LOOKUP(wchan);
  956                 besttd = TAILQ_LAST_FAST(head, thread, td_slpq);
  957                 while (besttd->td_lock != &sc->sc_lock) {
  958                         td = TAILQ_PREV_FAST(besttd, head, thread, td_slpq);
  959                         if (td == NULL)
  960                                 break;
  961                         besttd = td;
  962                 }
  963         } else {
  964                 /*
  965                  * Find the highest priority thread on the queue.  If there
  966                  * is a tie, use the thread that first appears in the queue
  967                  * as it has been sleeping the longest since threads are
  968                  * always added to the tail of sleep queues.
  969                  */
  970                 besttd = td = TAILQ_FIRST(head);
  971                 while ((td = TAILQ_NEXT(td, td_slpq)) != NULL) {
  972                         if (td->td_priority < besttd->td_priority)
  973                                 besttd = td;
  974                 }
  975         }
  976         MPASS(besttd != NULL);
  977         wakeup_swapper = sleepq_resume_thread(sq, besttd, pri,
  978             (flags & SLEEPQ_DROP) ? 0 : SRQ_HOLD);
  979         return (wakeup_swapper);
  980 }
  981 
  982 static bool
  983 match_any(struct thread *td __unused)
  984 {
  985 
  986         return (true);
  987 }
  988 
  989 /*
  990  * Resume all threads sleeping on a specified wait channel.
  991  */
  992 int
  993 sleepq_broadcast(const void *wchan, int flags, int pri, int queue)
  994 {
  995         struct sleepqueue *sq;
  996 
  997         CTR2(KTR_PROC, "sleepq_broadcast(%p, %d)", wchan, flags);
  998         KASSERT(wchan != NULL, ("%s: invalid NULL wait channel", __func__));
  999         MPASS((queue >= 0) && (queue < NR_SLEEPQS));
 1000         sq = sleepq_lookup(wchan);
 1001         if (sq == NULL)
 1002                 return (0);
 1003         KASSERT(sq->sq_type == (flags & SLEEPQ_TYPE),
 1004             ("%s: mismatch between sleep/wakeup and cv_*", __func__));
 1005 
 1006         return (sleepq_remove_matching(sq, queue, match_any, pri));
 1007 }
 1008 
 1009 /*
 1010  * Resume threads on the sleep queue that match the given predicate.
 1011  */
 1012 int
 1013 sleepq_remove_matching(struct sleepqueue *sq, int queue,
 1014     bool (*matches)(struct thread *), int pri)
 1015 {
 1016         struct thread *td, *tdn;
 1017         int wakeup_swapper;
 1018 
 1019         /*
 1020          * The last thread will be given ownership of sq and may
 1021          * re-enqueue itself before sleepq_resume_thread() returns,
 1022          * so we must cache the "next" queue item at the beginning
 1023          * of the final iteration.
 1024          */
 1025         wakeup_swapper = 0;
 1026         TAILQ_FOREACH_SAFE(td, &sq->sq_blocked[queue], td_slpq, tdn) {
 1027                 if (matches(td))
 1028                         wakeup_swapper |= sleepq_resume_thread(sq, td, pri,
 1029                             SRQ_HOLD);
 1030         }
 1031 
 1032         return (wakeup_swapper);
 1033 }
 1034 
 1035 /*
 1036  * Time sleeping threads out.  When the timeout expires, the thread is
 1037  * removed from the sleep queue and made runnable if it is still asleep.
 1038  */
 1039 static void
 1040 sleepq_timeout(void *arg)
 1041 {
 1042         struct sleepqueue_chain *sc __unused;
 1043         struct sleepqueue *sq;
 1044         struct thread *td;
 1045         const void *wchan;
 1046         int wakeup_swapper;
 1047 
 1048         td = arg;
 1049         CTR3(KTR_PROC, "sleepq_timeout: thread %p (pid %ld, %s)",
 1050             (void *)td, (long)td->td_proc->p_pid, (void *)td->td_name);
 1051 
 1052         thread_lock(td);
 1053         if (td->td_sleeptimo == 0 ||
 1054             td->td_sleeptimo > td->td_slpcallout.c_time) {
 1055                 /*
 1056                  * The thread does not want a timeout (yet).
 1057                  */
 1058         } else if (TD_IS_SLEEPING(td) && TD_ON_SLEEPQ(td)) {
 1059                 /*
 1060                  * See if the thread is asleep and get the wait
 1061                  * channel if it is.
 1062                  */
 1063                 wchan = td->td_wchan;
 1064                 sc = SC_LOOKUP(wchan);
 1065                 THREAD_LOCKPTR_ASSERT(td, &sc->sc_lock);
 1066                 sq = sleepq_lookup(wchan);
 1067                 MPASS(sq != NULL);
 1068                 td->td_flags |= TDF_TIMEOUT;
 1069                 wakeup_swapper = sleepq_resume_thread(sq, td, 0, 0);
 1070                 if (wakeup_swapper)
 1071                         kick_proc0();
 1072                 return;
 1073         } else if (TD_ON_SLEEPQ(td)) {
 1074                 /*
 1075                  * If the thread is on the SLEEPQ but isn't sleeping
 1076                  * yet, it can either be on another CPU in between
 1077                  * sleepq_add() and one of the sleepq_*wait*()
 1078                  * routines or it can be in sleepq_catch_signals().
 1079                  */
 1080                 td->td_flags |= TDF_TIMEOUT;
 1081         }
 1082         thread_unlock(td);
 1083 }
 1084 
 1085 /*
 1086  * Resumes a specific thread from the sleep queue associated with a specific
 1087  * wait channel if it is on that queue.
 1088  */
 1089 void
 1090 sleepq_remove(struct thread *td, const void *wchan)
 1091 {
 1092         struct sleepqueue_chain *sc;
 1093         struct sleepqueue *sq;
 1094         int wakeup_swapper;
 1095 
 1096         /*
 1097          * Look up the sleep queue for this wait channel, then re-check
 1098          * that the thread is asleep on that channel, if it is not, then
 1099          * bail.
 1100          */
 1101         MPASS(wchan != NULL);
 1102         sc = SC_LOOKUP(wchan);
 1103         mtx_lock_spin(&sc->sc_lock);
 1104         /*
 1105          * We can not lock the thread here as it may be sleeping on a
 1106          * different sleepq.  However, holding the sleepq lock for this
 1107          * wchan can guarantee that we do not miss a wakeup for this
 1108          * channel.  The asserts below will catch any false positives.
 1109          */
 1110         if (!TD_ON_SLEEPQ(td) || td->td_wchan != wchan) {
 1111                 mtx_unlock_spin(&sc->sc_lock);
 1112                 return;
 1113         }
 1114 
 1115         /* Thread is asleep on sleep queue sq, so wake it up. */
 1116         sq = sleepq_lookup(wchan);
 1117         MPASS(sq != NULL);
 1118         MPASS(td->td_wchan == wchan);
 1119         wakeup_swapper = sleepq_resume_thread(sq, td, 0, 0);
 1120         if (wakeup_swapper)
 1121                 kick_proc0();
 1122 }
 1123 
 1124 /*
 1125  * Abort a thread as if an interrupt had occurred.  Only abort
 1126  * interruptible waits (unfortunately it isn't safe to abort others).
 1127  *
 1128  * Requires thread lock on entry, releases on return.
 1129  */
 1130 int
 1131 sleepq_abort(struct thread *td, int intrval)
 1132 {
 1133         struct sleepqueue *sq;
 1134         const void *wchan;
 1135 
 1136         THREAD_LOCK_ASSERT(td, MA_OWNED);
 1137         MPASS(TD_ON_SLEEPQ(td));
 1138         MPASS(td->td_flags & TDF_SINTR);
 1139         MPASS((intrval == 0 && (td->td_flags & TDF_SIGWAIT) != 0) ||
 1140             intrval == EINTR || intrval == ERESTART);
 1141 
 1142         /*
 1143          * If the TDF_TIMEOUT flag is set, just leave. A
 1144          * timeout is scheduled anyhow.
 1145          */
 1146         if (td->td_flags & TDF_TIMEOUT) {
 1147                 thread_unlock(td);
 1148                 return (0);
 1149         }
 1150 
 1151         CTR3(KTR_PROC, "sleepq_abort: thread %p (pid %ld, %s)",
 1152             (void *)td, (long)td->td_proc->p_pid, (void *)td->td_name);
 1153         td->td_intrval = intrval;
 1154 
 1155         /*
 1156          * If the thread has not slept yet it will find the signal in
 1157          * sleepq_catch_signals() and call sleepq_resume_thread.  Otherwise
 1158          * we have to do it here.
 1159          */
 1160         if (!TD_IS_SLEEPING(td)) {
 1161                 thread_unlock(td);
 1162                 return (0);
 1163         }
 1164         wchan = td->td_wchan;
 1165         MPASS(wchan != NULL);
 1166         sq = sleepq_lookup(wchan);
 1167         MPASS(sq != NULL);
 1168 
 1169         /* Thread is asleep on sleep queue sq, so wake it up. */
 1170         return (sleepq_resume_thread(sq, td, 0, 0));
 1171 }
 1172 
 1173 void
 1174 sleepq_chains_remove_matching(bool (*matches)(struct thread *))
 1175 {
 1176         struct sleepqueue_chain *sc;
 1177         struct sleepqueue *sq, *sq1;
 1178         int i, wakeup_swapper;
 1179 
 1180         wakeup_swapper = 0;
 1181         for (sc = &sleepq_chains[0]; sc < sleepq_chains + SC_TABLESIZE; ++sc) {
 1182                 if (LIST_EMPTY(&sc->sc_queues)) {
 1183                         continue;
 1184                 }
 1185                 mtx_lock_spin(&sc->sc_lock);
 1186                 LIST_FOREACH_SAFE(sq, &sc->sc_queues, sq_hash, sq1) {
 1187                         for (i = 0; i < NR_SLEEPQS; ++i) {
 1188                                 wakeup_swapper |= sleepq_remove_matching(sq, i,
 1189                                     matches, 0);
 1190                         }
 1191                 }
 1192                 mtx_unlock_spin(&sc->sc_lock);
 1193         }
 1194         if (wakeup_swapper) {
 1195                 kick_proc0();
 1196         }
 1197 }
 1198 
 1199 /*
 1200  * Prints the stacks of all threads presently sleeping on wchan/queue to
 1201  * the sbuf sb.  Sets count_stacks_printed to the number of stacks actually
 1202  * printed.  Typically, this will equal the number of threads sleeping on the
 1203  * queue, but may be less if sb overflowed before all stacks were printed.
 1204  */
 1205 #ifdef STACK
 1206 int
 1207 sleepq_sbuf_print_stacks(struct sbuf *sb, const void *wchan, int queue,
 1208     int *count_stacks_printed)
 1209 {
 1210         struct thread *td, *td_next;
 1211         struct sleepqueue *sq;
 1212         struct stack **st;
 1213         struct sbuf **td_infos;
 1214         int i, stack_idx, error, stacks_to_allocate;
 1215         bool finished;
 1216 
 1217         error = 0;
 1218         finished = false;
 1219 
 1220         KASSERT(wchan != NULL, ("%s: invalid NULL wait channel", __func__));
 1221         MPASS((queue >= 0) && (queue < NR_SLEEPQS));
 1222 
 1223         stacks_to_allocate = 10;
 1224         for (i = 0; i < 3 && !finished ; i++) {
 1225                 /* We cannot malloc while holding the queue's spinlock, so
 1226                  * we do our mallocs now, and hope it is enough.  If it
 1227                  * isn't, we will free these, drop the lock, malloc more,
 1228                  * and try again, up to a point.  After that point we will
 1229                  * give up and report ENOMEM. We also cannot write to sb
 1230                  * during this time since the client may have set the
 1231                  * SBUF_AUTOEXTEND flag on their sbuf, which could cause a
 1232                  * malloc as we print to it.  So we defer actually printing
 1233                  * to sb until after we drop the spinlock.
 1234                  */
 1235 
 1236                 /* Where we will store the stacks. */
 1237                 st = malloc(sizeof(struct stack *) * stacks_to_allocate,
 1238                     M_TEMP, M_WAITOK);
 1239                 for (stack_idx = 0; stack_idx < stacks_to_allocate;
 1240                     stack_idx++)
 1241                         st[stack_idx] = stack_create(M_WAITOK);
 1242 
 1243                 /* Where we will store the td name, tid, etc. */
 1244                 td_infos = malloc(sizeof(struct sbuf *) * stacks_to_allocate,
 1245                     M_TEMP, M_WAITOK);
 1246                 for (stack_idx = 0; stack_idx < stacks_to_allocate;
 1247                     stack_idx++)
 1248                         td_infos[stack_idx] = sbuf_new(NULL, NULL,
 1249                             MAXCOMLEN + sizeof(struct thread *) * 2 + 40,
 1250                             SBUF_FIXEDLEN);
 1251 
 1252                 sleepq_lock(wchan);
 1253                 sq = sleepq_lookup(wchan);
 1254                 if (sq == NULL) {
 1255                         /* This sleepq does not exist; exit and return ENOENT. */
 1256                         error = ENOENT;
 1257                         finished = true;
 1258                         sleepq_release(wchan);
 1259                         goto loop_end;
 1260                 }
 1261 
 1262                 stack_idx = 0;
 1263                 /* Save thread info */
 1264                 TAILQ_FOREACH_SAFE(td, &sq->sq_blocked[queue], td_slpq,
 1265                     td_next) {
 1266                         if (stack_idx >= stacks_to_allocate)
 1267                                 goto loop_end;
 1268 
 1269                         /* Note the td_lock is equal to the sleepq_lock here. */
 1270                         (void)stack_save_td(st[stack_idx], td);
 1271 
 1272                         sbuf_printf(td_infos[stack_idx], "%d: %s %p",
 1273                             td->td_tid, td->td_name, td);
 1274 
 1275                         ++stack_idx;
 1276                 }
 1277 
 1278                 finished = true;
 1279                 sleepq_release(wchan);
 1280 
 1281                 /* Print the stacks */
 1282                 for (i = 0; i < stack_idx; i++) {
 1283                         sbuf_finish(td_infos[i]);
 1284                         sbuf_printf(sb, "--- thread %s: ---\n", sbuf_data(td_infos[i]));
 1285                         stack_sbuf_print(sb, st[i]);
 1286                         sbuf_printf(sb, "\n");
 1287 
 1288                         error = sbuf_error(sb);
 1289                         if (error == 0)
 1290                                 *count_stacks_printed = stack_idx;
 1291                 }
 1292 
 1293 loop_end:
 1294                 if (!finished)
 1295                         sleepq_release(wchan);
 1296                 for (stack_idx = 0; stack_idx < stacks_to_allocate;
 1297                     stack_idx++)
 1298                         stack_destroy(st[stack_idx]);
 1299                 for (stack_idx = 0; stack_idx < stacks_to_allocate;
 1300                     stack_idx++)
 1301                         sbuf_delete(td_infos[stack_idx]);
 1302                 free(st, M_TEMP);
 1303                 free(td_infos, M_TEMP);
 1304                 stacks_to_allocate *= 10;
 1305         }
 1306 
 1307         if (!finished && error == 0)
 1308                 error = ENOMEM;
 1309 
 1310         return (error);
 1311 }
 1312 #endif
 1313 
 1314 #ifdef SLEEPQUEUE_PROFILING
 1315 #define SLEEPQ_PROF_LOCATIONS   1024
 1316 #define SLEEPQ_SBUFSIZE         512
 1317 struct sleepq_prof {
 1318         LIST_ENTRY(sleepq_prof) sp_link;
 1319         const char      *sp_wmesg;
 1320         long            sp_count;
 1321 };
 1322 
 1323 LIST_HEAD(sqphead, sleepq_prof);
 1324 
 1325 struct sqphead sleepq_prof_free;
 1326 struct sqphead sleepq_hash[SC_TABLESIZE];
 1327 static struct sleepq_prof sleepq_profent[SLEEPQ_PROF_LOCATIONS];
 1328 static struct mtx sleepq_prof_lock;
 1329 MTX_SYSINIT(sleepq_prof_lock, &sleepq_prof_lock, "sleepq_prof", MTX_SPIN);
 1330 
 1331 static void
 1332 sleepq_profile(const char *wmesg)
 1333 {
 1334         struct sleepq_prof *sp;
 1335 
 1336         mtx_lock_spin(&sleepq_prof_lock);
 1337         if (prof_enabled == 0)
 1338                 goto unlock;
 1339         LIST_FOREACH(sp, &sleepq_hash[SC_HASH(wmesg)], sp_link)
 1340                 if (sp->sp_wmesg == wmesg)
 1341                         goto done;
 1342         sp = LIST_FIRST(&sleepq_prof_free);
 1343         if (sp == NULL)
 1344                 goto unlock;
 1345         sp->sp_wmesg = wmesg;
 1346         LIST_REMOVE(sp, sp_link);
 1347         LIST_INSERT_HEAD(&sleepq_hash[SC_HASH(wmesg)], sp, sp_link);
 1348 done:
 1349         sp->sp_count++;
 1350 unlock:
 1351         mtx_unlock_spin(&sleepq_prof_lock);
 1352         return;
 1353 }
 1354 
 1355 static void
 1356 sleepq_prof_reset(void)
 1357 {
 1358         struct sleepq_prof *sp;
 1359         int enabled;
 1360         int i;
 1361 
 1362         mtx_lock_spin(&sleepq_prof_lock);
 1363         enabled = prof_enabled;
 1364         prof_enabled = 0;
 1365         for (i = 0; i < SC_TABLESIZE; i++)
 1366                 LIST_INIT(&sleepq_hash[i]);
 1367         LIST_INIT(&sleepq_prof_free);
 1368         for (i = 0; i < SLEEPQ_PROF_LOCATIONS; i++) {
 1369                 sp = &sleepq_profent[i];
 1370                 sp->sp_wmesg = NULL;
 1371                 sp->sp_count = 0;
 1372                 LIST_INSERT_HEAD(&sleepq_prof_free, sp, sp_link);
 1373         }
 1374         prof_enabled = enabled;
 1375         mtx_unlock_spin(&sleepq_prof_lock);
 1376 }
 1377 
 1378 static int
 1379 enable_sleepq_prof(SYSCTL_HANDLER_ARGS)
 1380 {
 1381         int error, v;
 1382 
 1383         v = prof_enabled;
 1384         error = sysctl_handle_int(oidp, &v, v, req);
 1385         if (error)
 1386                 return (error);
 1387         if (req->newptr == NULL)
 1388                 return (error);
 1389         if (v == prof_enabled)
 1390                 return (0);
 1391         if (v == 1)
 1392                 sleepq_prof_reset();
 1393         mtx_lock_spin(&sleepq_prof_lock);
 1394         prof_enabled = !!v;
 1395         mtx_unlock_spin(&sleepq_prof_lock);
 1396 
 1397         return (0);
 1398 }
 1399 
 1400 static int
 1401 reset_sleepq_prof_stats(SYSCTL_HANDLER_ARGS)
 1402 {
 1403         int error, v;
 1404 
 1405         v = 0;
 1406         error = sysctl_handle_int(oidp, &v, 0, req);
 1407         if (error)
 1408                 return (error);
 1409         if (req->newptr == NULL)
 1410                 return (error);
 1411         if (v == 0)
 1412                 return (0);
 1413         sleepq_prof_reset();
 1414 
 1415         return (0);
 1416 }
 1417 
 1418 static int
 1419 dump_sleepq_prof_stats(SYSCTL_HANDLER_ARGS)
 1420 {
 1421         struct sleepq_prof *sp;
 1422         struct sbuf *sb;
 1423         int enabled;
 1424         int error;
 1425         int i;
 1426 
 1427         error = sysctl_wire_old_buffer(req, 0);
 1428         if (error != 0)
 1429                 return (error);
 1430         sb = sbuf_new_for_sysctl(NULL, NULL, SLEEPQ_SBUFSIZE, req);
 1431         sbuf_printf(sb, "\nwmesg\tcount\n");
 1432         enabled = prof_enabled;
 1433         mtx_lock_spin(&sleepq_prof_lock);
 1434         prof_enabled = 0;
 1435         mtx_unlock_spin(&sleepq_prof_lock);
 1436         for (i = 0; i < SC_TABLESIZE; i++) {
 1437                 LIST_FOREACH(sp, &sleepq_hash[i], sp_link) {
 1438                         sbuf_printf(sb, "%s\t%ld\n",
 1439                             sp->sp_wmesg, sp->sp_count);
 1440                 }
 1441         }
 1442         mtx_lock_spin(&sleepq_prof_lock);
 1443         prof_enabled = enabled;
 1444         mtx_unlock_spin(&sleepq_prof_lock);
 1445 
 1446         error = sbuf_finish(sb);
 1447         sbuf_delete(sb);
 1448         return (error);
 1449 }
 1450 
 1451 SYSCTL_PROC(_debug_sleepq, OID_AUTO, stats,
 1452     CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_NEEDGIANT, NULL, 0,
 1453     dump_sleepq_prof_stats, "A",
 1454     "Sleepqueue profiling statistics");
 1455 SYSCTL_PROC(_debug_sleepq, OID_AUTO, reset,
 1456     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, 0,
 1457     reset_sleepq_prof_stats, "I",
 1458     "Reset sleepqueue profiling statistics");
 1459 SYSCTL_PROC(_debug_sleepq, OID_AUTO, enable,
 1460     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, 0,
 1461     enable_sleepq_prof, "I",
 1462     "Enable sleepqueue profiling");
 1463 #endif
 1464 
 1465 #ifdef DDB
 1466 DB_SHOW_COMMAND(sleepq, db_show_sleepqueue)
 1467 {
 1468         struct sleepqueue_chain *sc;
 1469         struct sleepqueue *sq;
 1470 #ifdef INVARIANTS
 1471         struct lock_object *lock;
 1472 #endif
 1473         struct thread *td;
 1474         void *wchan;
 1475         int i;
 1476 
 1477         if (!have_addr)
 1478                 return;
 1479 
 1480         /*
 1481          * First, see if there is an active sleep queue for the wait channel
 1482          * indicated by the address.
 1483          */
 1484         wchan = (void *)addr;
 1485         sc = SC_LOOKUP(wchan);
 1486         LIST_FOREACH(sq, &sc->sc_queues, sq_hash)
 1487                 if (sq->sq_wchan == wchan)
 1488                         goto found;
 1489 
 1490         /*
 1491          * Second, see if there is an active sleep queue at the address
 1492          * indicated.
 1493          */
 1494         for (i = 0; i < SC_TABLESIZE; i++)
 1495                 LIST_FOREACH(sq, &sleepq_chains[i].sc_queues, sq_hash) {
 1496                         if (sq == (struct sleepqueue *)addr)
 1497                                 goto found;
 1498                 }
 1499 
 1500         db_printf("Unable to locate a sleep queue via %p\n", (void *)addr);
 1501         return;
 1502 found:
 1503         db_printf("Wait channel: %p\n", sq->sq_wchan);
 1504         db_printf("Queue type: %d\n", sq->sq_type);
 1505 #ifdef INVARIANTS
 1506         if (sq->sq_lock) {
 1507                 lock = sq->sq_lock;
 1508                 db_printf("Associated Interlock: %p - (%s) %s\n", lock,
 1509                     LOCK_CLASS(lock)->lc_name, lock->lo_name);
 1510         }
 1511 #endif
 1512         db_printf("Blocked threads:\n");
 1513         for (i = 0; i < NR_SLEEPQS; i++) {
 1514                 db_printf("\nQueue[%d]:\n", i);
 1515                 if (TAILQ_EMPTY(&sq->sq_blocked[i]))
 1516                         db_printf("\tempty\n");
 1517                 else
 1518                         TAILQ_FOREACH(td, &sq->sq_blocked[i],
 1519                                       td_slpq) {
 1520                                 db_printf("\t%p (tid %d, pid %d, \"%s\")\n", td,
 1521                                           td->td_tid, td->td_proc->p_pid,
 1522                                           td->td_name);
 1523                         }
 1524                 db_printf("(expected: %u)\n", sq->sq_blockedcnt[i]);
 1525         }
 1526 }
 1527 
 1528 /* Alias 'show sleepqueue' to 'show sleepq'. */
 1529 DB_SHOW_ALIAS(sleepqueue, db_show_sleepqueue);
 1530 #endif

Cache object: 574cd4cf4911763aa96a27a757f99c8c


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.