The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/subr_sleepqueue.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
    3  *
    4  * Copyright (c) 2004 John Baldwin <jhb@FreeBSD.org>
    5  *
    6  * Redistribution and use in source and binary forms, with or without
    7  * modification, are permitted provided that the following conditions
    8  * are met:
    9  * 1. Redistributions of source code must retain the above copyright
   10  *    notice, this list of conditions and the following disclaimer.
   11  * 2. Redistributions in binary form must reproduce the above copyright
   12  *    notice, this list of conditions and the following disclaimer in the
   13  *    documentation and/or other materials provided with the distribution.
   14  *
   15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   25  * SUCH DAMAGE.
   26  */
   27 
   28 /*
   29  * Implementation of sleep queues used to hold queue of threads blocked on
   30  * a wait channel.  Sleep queues are different from turnstiles in that wait
   31  * channels are not owned by anyone, so there is no priority propagation.
   32  * Sleep queues can also provide a timeout and can also be interrupted by
   33  * signals.  That said, there are several similarities between the turnstile
   34  * and sleep queue implementations.  (Note: turnstiles were implemented
   35  * first.)  For example, both use a hash table of the same size where each
   36  * bucket is referred to as a "chain" that contains both a spin lock and
   37  * a linked list of queues.  An individual queue is located by using a hash
   38  * to pick a chain, locking the chain, and then walking the chain searching
   39  * for the queue.  This means that a wait channel object does not need to
   40  * embed its queue head just as locks do not embed their turnstile queue
   41  * head.  Threads also carry around a sleep queue that they lend to the
   42  * wait channel when blocking.  Just as in turnstiles, the queue includes
   43  * a free list of the sleep queues of other threads blocked on the same
   44  * wait channel in the case of multiple waiters.
   45  *
   46  * Some additional functionality provided by sleep queues include the
   47  * ability to set a timeout.  The timeout is managed using a per-thread
   48  * callout that resumes a thread if it is asleep.  A thread may also
   49  * catch signals while it is asleep (aka an interruptible sleep).  The
   50  * signal code uses sleepq_abort() to interrupt a sleeping thread.  Finally,
   51  * sleep queues also provide some extra assertions.  One is not allowed to
   52  * mix the sleep/wakeup and cv APIs for a given wait channel.  Also, one
   53  * must consistently use the same lock to synchronize with a wait channel,
   54  * though this check is currently only a warning for sleep/wakeup due to
   55  * pre-existing abuse of that API.  The same lock must also be held when
   56  * awakening threads, though that is currently only enforced for condition
   57  * variables.
   58  */
   59 
   60 #include <sys/cdefs.h>
   61 __FBSDID("$FreeBSD$");
   62 
   63 #include "opt_sleepqueue_profiling.h"
   64 #include "opt_ddb.h"
   65 #include "opt_sched.h"
   66 #include "opt_stack.h"
   67 
   68 #include <sys/param.h>
   69 #include <sys/systm.h>
   70 #include <sys/lock.h>
   71 #include <sys/kernel.h>
   72 #include <sys/ktr.h>
   73 #include <sys/mutex.h>
   74 #include <sys/proc.h>
   75 #include <sys/sbuf.h>
   76 #include <sys/sched.h>
   77 #include <sys/sdt.h>
   78 #include <sys/signalvar.h>
   79 #include <sys/sleepqueue.h>
   80 #include <sys/stack.h>
   81 #include <sys/sysctl.h>
   82 #include <sys/time.h>
   83 #ifdef EPOCH_TRACE
   84 #include <sys/epoch.h>
   85 #endif
   86 
   87 #include <machine/atomic.h>
   88 
   89 #include <vm/uma.h>
   90 
   91 #ifdef DDB
   92 #include <ddb/ddb.h>
   93 #endif
   94 
   95 /*
   96  * Constants for the hash table of sleep queue chains.
   97  * SC_TABLESIZE must be a power of two for SC_MASK to work properly.
   98  */
   99 #ifndef SC_TABLESIZE
  100 #define SC_TABLESIZE    256
  101 #endif
  102 CTASSERT(powerof2(SC_TABLESIZE));
  103 #define SC_MASK         (SC_TABLESIZE - 1)
  104 #define SC_SHIFT        8
  105 #define SC_HASH(wc)     ((((uintptr_t)(wc) >> SC_SHIFT) ^ (uintptr_t)(wc)) & \
  106                             SC_MASK)
  107 #define SC_LOOKUP(wc)   &sleepq_chains[SC_HASH(wc)]
  108 #define NR_SLEEPQS      2
  109 /*
  110  * There are two different lists of sleep queues.  Both lists are connected
  111  * via the sq_hash entries.  The first list is the sleep queue chain list
  112  * that a sleep queue is on when it is attached to a wait channel.  The
  113  * second list is the free list hung off of a sleep queue that is attached
  114  * to a wait channel.
  115  *
  116  * Each sleep queue also contains the wait channel it is attached to, the
  117  * list of threads blocked on that wait channel, flags specific to the
  118  * wait channel, and the lock used to synchronize with a wait channel.
  119  * The flags are used to catch mismatches between the various consumers
  120  * of the sleep queue API (e.g. sleep/wakeup and condition variables).
  121  * The lock pointer is only used when invariants are enabled for various
  122  * debugging checks.
  123  *
  124  * Locking key:
  125  *  c - sleep queue chain lock
  126  */
  127 struct sleepqueue {
  128         struct threadqueue sq_blocked[NR_SLEEPQS]; /* (c) Blocked threads. */
  129         u_int sq_blockedcnt[NR_SLEEPQS];        /* (c) N. of blocked threads. */
  130         LIST_ENTRY(sleepqueue) sq_hash;         /* (c) Chain and free list. */
  131         LIST_HEAD(, sleepqueue) sq_free;        /* (c) Free queues. */
  132         const void      *sq_wchan;              /* (c) Wait channel. */
  133         int     sq_type;                        /* (c) Queue type. */
  134 #ifdef INVARIANTS
  135         struct lock_object *sq_lock;            /* (c) Associated lock. */
  136 #endif
  137 };
  138 
  139 struct sleepqueue_chain {
  140         LIST_HEAD(, sleepqueue) sc_queues;      /* List of sleep queues. */
  141         struct mtx sc_lock;                     /* Spin lock for this chain. */
  142 #ifdef SLEEPQUEUE_PROFILING
  143         u_int   sc_depth;                       /* Length of sc_queues. */
  144         u_int   sc_max_depth;                   /* Max length of sc_queues. */
  145 #endif
  146 } __aligned(CACHE_LINE_SIZE);
  147 
  148 #ifdef SLEEPQUEUE_PROFILING
  149 static SYSCTL_NODE(_debug, OID_AUTO, sleepq, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
  150     "sleepq profiling");
  151 static SYSCTL_NODE(_debug_sleepq, OID_AUTO, chains,
  152     CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
  153     "sleepq chain stats");
  154 static u_int sleepq_max_depth;
  155 SYSCTL_UINT(_debug_sleepq, OID_AUTO, max_depth, CTLFLAG_RD, &sleepq_max_depth,
  156     0, "maxmimum depth achieved of a single chain");
  157 
  158 static void     sleepq_profile(const char *wmesg);
  159 static int      prof_enabled;
  160 #endif
  161 static struct sleepqueue_chain sleepq_chains[SC_TABLESIZE];
  162 static uma_zone_t sleepq_zone;
  163 
  164 /*
  165  * Prototypes for non-exported routines.
  166  */
  167 static int      sleepq_catch_signals(const void *wchan, int pri);
  168 static inline int sleepq_check_signals(void);
  169 static inline int sleepq_check_timeout(void);
  170 #ifdef INVARIANTS
  171 static void     sleepq_dtor(void *mem, int size, void *arg);
  172 #endif
  173 static int      sleepq_init(void *mem, int size, int flags);
  174 static int      sleepq_resume_thread(struct sleepqueue *sq, struct thread *td,
  175                     int pri, int srqflags);
  176 static void     sleepq_remove_thread(struct sleepqueue *sq, struct thread *td);
  177 static void     sleepq_switch(const void *wchan, int pri);
  178 static void     sleepq_timeout(void *arg);
  179 
  180 SDT_PROBE_DECLARE(sched, , , sleep);
  181 SDT_PROBE_DECLARE(sched, , , wakeup);
  182 
  183 /*
  184  * Initialize SLEEPQUEUE_PROFILING specific sysctl nodes.
  185  * Note that it must happen after sleepinit() has been fully executed, so
  186  * it must happen after SI_SUB_KMEM SYSINIT() subsystem setup.
  187  */
  188 #ifdef SLEEPQUEUE_PROFILING
  189 static void
  190 init_sleepqueue_profiling(void)
  191 {
  192         char chain_name[10];
  193         struct sysctl_oid *chain_oid;
  194         u_int i;
  195 
  196         for (i = 0; i < SC_TABLESIZE; i++) {
  197                 snprintf(chain_name, sizeof(chain_name), "%u", i);
  198                 chain_oid = SYSCTL_ADD_NODE(NULL,
  199                     SYSCTL_STATIC_CHILDREN(_debug_sleepq_chains), OID_AUTO,
  200                     chain_name, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
  201                     "sleepq chain stats");
  202                 SYSCTL_ADD_UINT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO,
  203                     "depth", CTLFLAG_RD, &sleepq_chains[i].sc_depth, 0, NULL);
  204                 SYSCTL_ADD_UINT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO,
  205                     "max_depth", CTLFLAG_RD, &sleepq_chains[i].sc_max_depth, 0,
  206                     NULL);
  207         }
  208 }
  209 
  210 SYSINIT(sleepqueue_profiling, SI_SUB_LOCK, SI_ORDER_ANY,
  211     init_sleepqueue_profiling, NULL);
  212 #endif
  213 
  214 /*
  215  * Early initialization of sleep queues that is called from the sleepinit()
  216  * SYSINIT.
  217  */
  218 void
  219 init_sleepqueues(void)
  220 {
  221         int i;
  222 
  223         for (i = 0; i < SC_TABLESIZE; i++) {
  224                 LIST_INIT(&sleepq_chains[i].sc_queues);
  225                 mtx_init(&sleepq_chains[i].sc_lock, "sleepq chain", NULL,
  226                     MTX_SPIN);
  227         }
  228         sleepq_zone = uma_zcreate("SLEEPQUEUE", sizeof(struct sleepqueue),
  229 #ifdef INVARIANTS
  230             NULL, sleepq_dtor, sleepq_init, NULL, UMA_ALIGN_CACHE, 0);
  231 #else
  232             NULL, NULL, sleepq_init, NULL, UMA_ALIGN_CACHE, 0);
  233 #endif
  234 
  235         thread0.td_sleepqueue = sleepq_alloc();
  236 }
  237 
  238 /*
  239  * Get a sleep queue for a new thread.
  240  */
  241 struct sleepqueue *
  242 sleepq_alloc(void)
  243 {
  244 
  245         return (uma_zalloc(sleepq_zone, M_WAITOK));
  246 }
  247 
  248 /*
  249  * Free a sleep queue when a thread is destroyed.
  250  */
  251 void
  252 sleepq_free(struct sleepqueue *sq)
  253 {
  254 
  255         uma_zfree(sleepq_zone, sq);
  256 }
  257 
  258 /*
  259  * Lock the sleep queue chain associated with the specified wait channel.
  260  */
  261 void
  262 sleepq_lock(const void *wchan)
  263 {
  264         struct sleepqueue_chain *sc;
  265 
  266         sc = SC_LOOKUP(wchan);
  267         mtx_lock_spin(&sc->sc_lock);
  268 }
  269 
  270 /*
  271  * Look up the sleep queue associated with a given wait channel in the hash
  272  * table locking the associated sleep queue chain.  If no queue is found in
  273  * the table, NULL is returned.
  274  */
  275 struct sleepqueue *
  276 sleepq_lookup(const void *wchan)
  277 {
  278         struct sleepqueue_chain *sc;
  279         struct sleepqueue *sq;
  280 
  281         KASSERT(wchan != NULL, ("%s: invalid NULL wait channel", __func__));
  282         sc = SC_LOOKUP(wchan);
  283         mtx_assert(&sc->sc_lock, MA_OWNED);
  284         LIST_FOREACH(sq, &sc->sc_queues, sq_hash)
  285                 if (sq->sq_wchan == wchan)
  286                         return (sq);
  287         return (NULL);
  288 }
  289 
  290 /*
  291  * Unlock the sleep queue chain associated with a given wait channel.
  292  */
  293 void
  294 sleepq_release(const void *wchan)
  295 {
  296         struct sleepqueue_chain *sc;
  297 
  298         sc = SC_LOOKUP(wchan);
  299         mtx_unlock_spin(&sc->sc_lock);
  300 }
  301 
  302 /*
  303  * Places the current thread on the sleep queue for the specified wait
  304  * channel.  If INVARIANTS is enabled, then it associates the passed in
  305  * lock with the sleepq to make sure it is held when that sleep queue is
  306  * woken up.
  307  */
  308 void
  309 sleepq_add(const void *wchan, struct lock_object *lock, const char *wmesg,
  310     int flags, int queue)
  311 {
  312         struct sleepqueue_chain *sc;
  313         struct sleepqueue *sq;
  314         struct thread *td;
  315 
  316         td = curthread;
  317         sc = SC_LOOKUP(wchan);
  318         mtx_assert(&sc->sc_lock, MA_OWNED);
  319         MPASS(td->td_sleepqueue != NULL);
  320         MPASS(wchan != NULL);
  321         MPASS((queue >= 0) && (queue < NR_SLEEPQS));
  322 
  323         /* If this thread is not allowed to sleep, die a horrible death. */
  324         if (__predict_false(!THREAD_CAN_SLEEP())) {
  325 #ifdef EPOCH_TRACE
  326                 epoch_trace_list(curthread);
  327 #endif
  328                 KASSERT(0,
  329                     ("%s: td %p to sleep on wchan %p with sleeping prohibited",
  330                     __func__, td, wchan));
  331         }
  332 
  333         /* Look up the sleep queue associated with the wait channel 'wchan'. */
  334         sq = sleepq_lookup(wchan);
  335 
  336         /*
  337          * If the wait channel does not already have a sleep queue, use
  338          * this thread's sleep queue.  Otherwise, insert the current thread
  339          * into the sleep queue already in use by this wait channel.
  340          */
  341         if (sq == NULL) {
  342 #ifdef INVARIANTS
  343                 int i;
  344 
  345                 sq = td->td_sleepqueue;
  346                 for (i = 0; i < NR_SLEEPQS; i++) {
  347                         KASSERT(TAILQ_EMPTY(&sq->sq_blocked[i]),
  348                             ("thread's sleep queue %d is not empty", i));
  349                         KASSERT(sq->sq_blockedcnt[i] == 0,
  350                             ("thread's sleep queue %d count mismatches", i));
  351                 }
  352                 KASSERT(LIST_EMPTY(&sq->sq_free),
  353                     ("thread's sleep queue has a non-empty free list"));
  354                 KASSERT(sq->sq_wchan == NULL, ("stale sq_wchan pointer"));
  355                 sq->sq_lock = lock;
  356 #endif
  357 #ifdef SLEEPQUEUE_PROFILING
  358                 sc->sc_depth++;
  359                 if (sc->sc_depth > sc->sc_max_depth) {
  360                         sc->sc_max_depth = sc->sc_depth;
  361                         if (sc->sc_max_depth > sleepq_max_depth)
  362                                 sleepq_max_depth = sc->sc_max_depth;
  363                 }
  364 #endif
  365                 sq = td->td_sleepqueue;
  366                 LIST_INSERT_HEAD(&sc->sc_queues, sq, sq_hash);
  367                 sq->sq_wchan = wchan;
  368                 sq->sq_type = flags & SLEEPQ_TYPE;
  369         } else {
  370                 MPASS(wchan == sq->sq_wchan);
  371                 MPASS(lock == sq->sq_lock);
  372                 MPASS((flags & SLEEPQ_TYPE) == sq->sq_type);
  373                 LIST_INSERT_HEAD(&sq->sq_free, td->td_sleepqueue, sq_hash);
  374         }
  375         thread_lock(td);
  376         TAILQ_INSERT_TAIL(&sq->sq_blocked[queue], td, td_slpq);
  377         sq->sq_blockedcnt[queue]++;
  378         td->td_sleepqueue = NULL;
  379         td->td_sqqueue = queue;
  380         td->td_wchan = wchan;
  381         td->td_wmesg = wmesg;
  382         if (flags & SLEEPQ_INTERRUPTIBLE) {
  383                 td->td_intrval = 0;
  384                 td->td_flags |= TDF_SINTR;
  385         }
  386         td->td_flags &= ~TDF_TIMEOUT;
  387         thread_unlock(td);
  388 }
  389 
  390 /*
  391  * Sets a timeout that will remove the current thread from the
  392  * specified sleep queue at the specified time if the thread has not
  393  * already been awakened.  Flags are from C_* (callout) namespace.
  394  */
  395 void
  396 sleepq_set_timeout_sbt(const void *wchan, sbintime_t sbt, sbintime_t pr,
  397     int flags)
  398 {
  399         struct sleepqueue_chain *sc __unused;
  400         struct thread *td;
  401         sbintime_t pr1;
  402 
  403         td = curthread;
  404         sc = SC_LOOKUP(wchan);
  405         mtx_assert(&sc->sc_lock, MA_OWNED);
  406         MPASS(TD_ON_SLEEPQ(td));
  407         MPASS(td->td_sleepqueue == NULL);
  408         MPASS(wchan != NULL);
  409         if (cold && td == &thread0)
  410                 panic("timed sleep before timers are working");
  411         KASSERT(td->td_sleeptimo == 0, ("td %d %p td_sleeptimo %jx",
  412             td->td_tid, td, (uintmax_t)td->td_sleeptimo));
  413         thread_lock(td);
  414         callout_when(sbt, pr, flags, &td->td_sleeptimo, &pr1);
  415         thread_unlock(td);
  416         callout_reset_sbt_on(&td->td_slpcallout, td->td_sleeptimo, pr1,
  417             sleepq_timeout, td, PCPU_GET(cpuid), flags | C_PRECALC |
  418             C_DIRECT_EXEC);
  419 }
  420 
  421 /*
  422  * Return the number of actual sleepers for the specified queue.
  423  */
  424 u_int
  425 sleepq_sleepcnt(const void *wchan, int queue)
  426 {
  427         struct sleepqueue *sq;
  428 
  429         KASSERT(wchan != NULL, ("%s: invalid NULL wait channel", __func__));
  430         MPASS((queue >= 0) && (queue < NR_SLEEPQS));
  431         sq = sleepq_lookup(wchan);
  432         if (sq == NULL)
  433                 return (0);
  434         return (sq->sq_blockedcnt[queue]);
  435 }
  436 
  437 static int
  438 sleepq_check_ast_sc_locked(struct thread *td, struct sleepqueue_chain *sc)
  439 {
  440         struct proc *p;
  441         int ret;
  442 
  443         mtx_assert(&sc->sc_lock, MA_OWNED);
  444 
  445         if ((td->td_pflags & TDP_WAKEUP) != 0) {
  446                 td->td_pflags &= ~TDP_WAKEUP;
  447                 thread_lock(td);
  448                 return (EINTR);
  449         }
  450 
  451         /*
  452          * See if there are any pending signals or suspension requests for this
  453          * thread.  If not, we can switch immediately.
  454          */
  455         thread_lock(td);
  456         if ((td->td_flags & (TDF_NEEDSIGCHK | TDF_NEEDSUSPCHK)) == 0)
  457                 return (0);
  458 
  459         thread_unlock(td);
  460         mtx_unlock_spin(&sc->sc_lock);
  461 
  462         p = td->td_proc;
  463         CTR3(KTR_PROC, "sleepq catching signals: thread %p (pid %ld, %s)",
  464             (void *)td, (long)p->p_pid, td->td_name);
  465         PROC_LOCK(p);
  466 
  467         /*
  468          * Check for suspension first. Checking for signals and then
  469          * suspending could result in a missed signal, since a signal
  470          * can be delivered while this thread is suspended.
  471          */
  472         ret = sig_ast_checksusp(td);
  473         if (ret != 0) {
  474                 PROC_UNLOCK(p);
  475                 mtx_lock_spin(&sc->sc_lock);
  476                 thread_lock(td);
  477                 return (ret);
  478         }
  479 
  480         ret = sig_ast_needsigchk(td);
  481 
  482         /*
  483          * Lock the per-process spinlock prior to dropping the
  484          * PROC_LOCK to avoid a signal delivery race.
  485          * PROC_LOCK, PROC_SLOCK, and thread_lock() are
  486          * currently held in tdsendsignal() and thread_single().
  487          */
  488         PROC_SLOCK(p);
  489         mtx_lock_spin(&sc->sc_lock);
  490         PROC_UNLOCK(p);
  491         thread_lock(td);
  492         PROC_SUNLOCK(p);
  493 
  494         return (ret);
  495 }
  496 
  497 /*
  498  * Marks the pending sleep of the current thread as interruptible and
  499  * makes an initial check for pending signals before putting a thread
  500  * to sleep. Enters and exits with the thread lock held.  Thread lock
  501  * may have transitioned from the sleepq lock to a run lock.
  502  */
  503 static int
  504 sleepq_catch_signals(const void *wchan, int pri)
  505 {
  506         struct thread *td;
  507         struct sleepqueue_chain *sc;
  508         struct sleepqueue *sq;
  509         int ret;
  510 
  511         sc = SC_LOOKUP(wchan);
  512         mtx_assert(&sc->sc_lock, MA_OWNED);
  513         MPASS(wchan != NULL);
  514         td = curthread;
  515 
  516         ret = sleepq_check_ast_sc_locked(td, sc);
  517         THREAD_LOCK_ASSERT(td, MA_OWNED);
  518         mtx_assert(&sc->sc_lock, MA_OWNED);
  519 
  520         if (ret == 0) {
  521                 /*
  522                  * No pending signals and no suspension requests found.
  523                  * Switch the thread off the cpu.
  524                  */
  525                 sleepq_switch(wchan, pri);
  526         } else {
  527                 /*
  528                  * There were pending signals and this thread is still
  529                  * on the sleep queue, remove it from the sleep queue.
  530                  */
  531                 if (TD_ON_SLEEPQ(td)) {
  532                         sq = sleepq_lookup(wchan);
  533                         sleepq_remove_thread(sq, td);
  534                 }
  535                 MPASS(td->td_lock != &sc->sc_lock);
  536                 mtx_unlock_spin(&sc->sc_lock);
  537                 thread_unlock(td);
  538         }
  539         return (ret);
  540 }
  541 
  542 /*
  543  * Switches to another thread if we are still asleep on a sleep queue.
  544  * Returns with thread lock.
  545  */
  546 static void
  547 sleepq_switch(const void *wchan, int pri)
  548 {
  549         struct sleepqueue_chain *sc;
  550         struct sleepqueue *sq;
  551         struct thread *td;
  552         bool rtc_changed;
  553 
  554         td = curthread;
  555         sc = SC_LOOKUP(wchan);
  556         mtx_assert(&sc->sc_lock, MA_OWNED);
  557         THREAD_LOCK_ASSERT(td, MA_OWNED);
  558 
  559         /*
  560          * If we have a sleep queue, then we've already been woken up, so
  561          * just return.
  562          */
  563         if (td->td_sleepqueue != NULL) {
  564                 mtx_unlock_spin(&sc->sc_lock);
  565                 thread_unlock(td);
  566                 return;
  567         }
  568 
  569         /*
  570          * If TDF_TIMEOUT is set, then our sleep has been timed out
  571          * already but we are still on the sleep queue, so dequeue the
  572          * thread and return.
  573          *
  574          * Do the same if the real-time clock has been adjusted since this
  575          * thread calculated its timeout based on that clock.  This handles
  576          * the following race:
  577          * - The Ts thread needs to sleep until an absolute real-clock time.
  578          *   It copies the global rtc_generation into curthread->td_rtcgen,
  579          *   reads the RTC, and calculates a sleep duration based on that time.
  580          *   See umtxq_sleep() for an example.
  581          * - The Tc thread adjusts the RTC, bumps rtc_generation, and wakes
  582          *   threads that are sleeping until an absolute real-clock time.
  583          *   See tc_setclock() and the POSIX specification of clock_settime().
  584          * - Ts reaches the code below.  It holds the sleepqueue chain lock,
  585          *   so Tc has finished waking, so this thread must test td_rtcgen.
  586          * (The declaration of td_rtcgen refers to this comment.)
  587          */
  588         rtc_changed = td->td_rtcgen != 0 && td->td_rtcgen != rtc_generation;
  589         if ((td->td_flags & TDF_TIMEOUT) || rtc_changed) {
  590                 if (rtc_changed) {
  591                         td->td_rtcgen = 0;
  592                 }
  593                 MPASS(TD_ON_SLEEPQ(td));
  594                 sq = sleepq_lookup(wchan);
  595                 sleepq_remove_thread(sq, td);
  596                 mtx_unlock_spin(&sc->sc_lock);
  597                 thread_unlock(td);
  598                 return;
  599         }
  600 #ifdef SLEEPQUEUE_PROFILING
  601         if (prof_enabled)
  602                 sleepq_profile(td->td_wmesg);
  603 #endif
  604         MPASS(td->td_sleepqueue == NULL);
  605         sched_sleep(td, pri);
  606         thread_lock_set(td, &sc->sc_lock);
  607         SDT_PROBE0(sched, , , sleep);
  608         TD_SET_SLEEPING(td);
  609         mi_switch(SW_VOL | SWT_SLEEPQ);
  610         KASSERT(TD_IS_RUNNING(td), ("running but not TDS_RUNNING"));
  611         CTR3(KTR_PROC, "sleepq resume: thread %p (pid %ld, %s)",
  612             (void *)td, (long)td->td_proc->p_pid, (void *)td->td_name);
  613 }
  614 
  615 /*
  616  * Check to see if we timed out.
  617  */
  618 static inline int
  619 sleepq_check_timeout(void)
  620 {
  621         struct thread *td;
  622         int res;
  623 
  624         res = 0;
  625         td = curthread;
  626         if (td->td_sleeptimo != 0) {
  627                 if (td->td_sleeptimo <= sbinuptime())
  628                         res = EWOULDBLOCK;
  629                 td->td_sleeptimo = 0;
  630         }
  631         return (res);
  632 }
  633 
  634 /*
  635  * Check to see if we were awoken by a signal.
  636  */
  637 static inline int
  638 sleepq_check_signals(void)
  639 {
  640         struct thread *td;
  641 
  642         td = curthread;
  643         KASSERT((td->td_flags & TDF_SINTR) == 0,
  644             ("thread %p still in interruptible sleep?", td));
  645 
  646         return (td->td_intrval);
  647 }
  648 
  649 /*
  650  * Block the current thread until it is awakened from its sleep queue.
  651  */
  652 void
  653 sleepq_wait(const void *wchan, int pri)
  654 {
  655         struct thread *td;
  656 
  657         td = curthread;
  658         MPASS(!(td->td_flags & TDF_SINTR));
  659         thread_lock(td);
  660         sleepq_switch(wchan, pri);
  661 }
  662 
  663 /*
  664  * Block the current thread until it is awakened from its sleep queue
  665  * or it is interrupted by a signal.
  666  */
  667 int
  668 sleepq_wait_sig(const void *wchan, int pri)
  669 {
  670         int rcatch;
  671 
  672         rcatch = sleepq_catch_signals(wchan, pri);
  673         if (rcatch)
  674                 return (rcatch);
  675         return (sleepq_check_signals());
  676 }
  677 
  678 /*
  679  * Block the current thread until it is awakened from its sleep queue
  680  * or it times out while waiting.
  681  */
  682 int
  683 sleepq_timedwait(const void *wchan, int pri)
  684 {
  685         struct thread *td;
  686 
  687         td = curthread;
  688         MPASS(!(td->td_flags & TDF_SINTR));
  689 
  690         thread_lock(td);
  691         sleepq_switch(wchan, pri);
  692 
  693         return (sleepq_check_timeout());
  694 }
  695 
  696 /*
  697  * Block the current thread until it is awakened from its sleep queue,
  698  * it is interrupted by a signal, or it times out waiting to be awakened.
  699  */
  700 int
  701 sleepq_timedwait_sig(const void *wchan, int pri)
  702 {
  703         int rcatch, rvalt, rvals;
  704 
  705         rcatch = sleepq_catch_signals(wchan, pri);
  706         /* We must always call check_timeout() to clear sleeptimo. */
  707         rvalt = sleepq_check_timeout();
  708         rvals = sleepq_check_signals();
  709         if (rcatch)
  710                 return (rcatch);
  711         if (rvals)
  712                 return (rvals);
  713         return (rvalt);
  714 }
  715 
  716 /*
  717  * Returns the type of sleepqueue given a waitchannel.
  718  */
  719 int
  720 sleepq_type(const void *wchan)
  721 {
  722         struct sleepqueue *sq;
  723         int type;
  724 
  725         MPASS(wchan != NULL);
  726 
  727         sq = sleepq_lookup(wchan);
  728         if (sq == NULL)
  729                 return (-1);
  730         type = sq->sq_type;
  731 
  732         return (type);
  733 }
  734 
  735 /*
  736  * Removes a thread from a sleep queue and makes it
  737  * runnable.
  738  *
  739  * Requires the sc chain locked on entry.  If SRQ_HOLD is specified it will
  740  * be locked on return.  Returns without the thread lock held.
  741  */
  742 static int
  743 sleepq_resume_thread(struct sleepqueue *sq, struct thread *td, int pri,
  744     int srqflags)
  745 {
  746         struct sleepqueue_chain *sc;
  747         bool drop;
  748 
  749         MPASS(td != NULL);
  750         MPASS(sq->sq_wchan != NULL);
  751         MPASS(td->td_wchan == sq->sq_wchan);
  752 
  753         sc = SC_LOOKUP(sq->sq_wchan);
  754         mtx_assert(&sc->sc_lock, MA_OWNED);
  755 
  756         /*
  757          * Avoid recursing on the chain lock.  If the locks don't match we
  758          * need to acquire the thread lock which setrunnable will drop for
  759          * us.  In this case we need to drop the chain lock afterwards.
  760          *
  761          * There is no race that will make td_lock equal to sc_lock because
  762          * we hold sc_lock.
  763          */
  764         drop = false;
  765         if (!TD_IS_SLEEPING(td)) {
  766                 thread_lock(td);
  767                 drop = true;
  768         } else
  769                 thread_lock_block_wait(td);
  770 
  771         /* Remove thread from the sleepq. */
  772         sleepq_remove_thread(sq, td);
  773 
  774         /* If we're done with the sleepqueue release it. */
  775         if ((srqflags & SRQ_HOLD) == 0 && drop)
  776                 mtx_unlock_spin(&sc->sc_lock);
  777 
  778         /* Adjust priority if requested. */
  779         MPASS(pri == 0 || (pri >= PRI_MIN && pri <= PRI_MAX));
  780         if (pri != 0 && td->td_priority > pri &&
  781             PRI_BASE(td->td_pri_class) == PRI_TIMESHARE)
  782                 sched_prio(td, pri);
  783 
  784         /*
  785          * Note that thread td might not be sleeping if it is running
  786          * sleepq_catch_signals() on another CPU or is blocked on its
  787          * proc lock to check signals.  There's no need to mark the
  788          * thread runnable in that case.
  789          */
  790         if (TD_IS_SLEEPING(td)) {
  791                 MPASS(!drop);
  792                 TD_CLR_SLEEPING(td);
  793                 return (setrunnable(td, srqflags));
  794         }
  795         MPASS(drop);
  796         thread_unlock(td);
  797 
  798         return (0);
  799 }
  800 
  801 static void
  802 sleepq_remove_thread(struct sleepqueue *sq, struct thread *td)
  803 {
  804         struct sleepqueue_chain *sc __unused;
  805 
  806         MPASS(td != NULL);
  807         MPASS(sq->sq_wchan != NULL);
  808         MPASS(td->td_wchan == sq->sq_wchan);
  809         MPASS(td->td_sqqueue < NR_SLEEPQS && td->td_sqqueue >= 0);
  810         THREAD_LOCK_ASSERT(td, MA_OWNED);
  811         sc = SC_LOOKUP(sq->sq_wchan);
  812         mtx_assert(&sc->sc_lock, MA_OWNED);
  813 
  814         SDT_PROBE2(sched, , , wakeup, td, td->td_proc);
  815 
  816         /* Remove the thread from the queue. */
  817         sq->sq_blockedcnt[td->td_sqqueue]--;
  818         TAILQ_REMOVE(&sq->sq_blocked[td->td_sqqueue], td, td_slpq);
  819 
  820         /*
  821          * Get a sleep queue for this thread.  If this is the last waiter,
  822          * use the queue itself and take it out of the chain, otherwise,
  823          * remove a queue from the free list.
  824          */
  825         if (LIST_EMPTY(&sq->sq_free)) {
  826                 td->td_sleepqueue = sq;
  827 #ifdef INVARIANTS
  828                 sq->sq_wchan = NULL;
  829 #endif
  830 #ifdef SLEEPQUEUE_PROFILING
  831                 sc->sc_depth--;
  832 #endif
  833         } else
  834                 td->td_sleepqueue = LIST_FIRST(&sq->sq_free);
  835         LIST_REMOVE(td->td_sleepqueue, sq_hash);
  836 
  837         if ((td->td_flags & TDF_TIMEOUT) == 0 && td->td_sleeptimo != 0)
  838                 /*
  839                  * We ignore the situation where timeout subsystem was
  840                  * unable to stop our callout.  The struct thread is
  841                  * type-stable, the callout will use the correct
  842                  * memory when running.  The checks of the
  843                  * td_sleeptimo value in this function and in
  844                  * sleepq_timeout() ensure that the thread does not
  845                  * get spurious wakeups, even if the callout was reset
  846                  * or thread reused.
  847                  */
  848                 callout_stop(&td->td_slpcallout);
  849 
  850         td->td_wmesg = NULL;
  851         td->td_wchan = NULL;
  852         td->td_flags &= ~(TDF_SINTR | TDF_TIMEOUT);
  853 
  854         CTR3(KTR_PROC, "sleepq_wakeup: thread %p (pid %ld, %s)",
  855             (void *)td, (long)td->td_proc->p_pid, td->td_name);
  856 }
  857 
  858 void
  859 sleepq_remove_nested(struct thread *td)
  860 {
  861         struct sleepqueue_chain *sc;
  862         struct sleepqueue *sq;
  863         const void *wchan;
  864 
  865         MPASS(TD_ON_SLEEPQ(td));
  866 
  867         wchan = td->td_wchan;
  868         sc = SC_LOOKUP(wchan);
  869         mtx_lock_spin(&sc->sc_lock);
  870         sq = sleepq_lookup(wchan);
  871         MPASS(sq != NULL);
  872         thread_lock(td);
  873         sleepq_remove_thread(sq, td);
  874         mtx_unlock_spin(&sc->sc_lock);
  875         /* Returns with the thread lock owned. */
  876 }
  877 
  878 #ifdef INVARIANTS
  879 /*
  880  * UMA zone item deallocator.
  881  */
  882 static void
  883 sleepq_dtor(void *mem, int size, void *arg)
  884 {
  885         struct sleepqueue *sq;
  886         int i;
  887 
  888         sq = mem;
  889         for (i = 0; i < NR_SLEEPQS; i++) {
  890                 MPASS(TAILQ_EMPTY(&sq->sq_blocked[i]));
  891                 MPASS(sq->sq_blockedcnt[i] == 0);
  892         }
  893 }
  894 #endif
  895 
  896 /*
  897  * UMA zone item initializer.
  898  */
  899 static int
  900 sleepq_init(void *mem, int size, int flags)
  901 {
  902         struct sleepqueue *sq;
  903         int i;
  904 
  905         bzero(mem, size);
  906         sq = mem;
  907         for (i = 0; i < NR_SLEEPQS; i++) {
  908                 TAILQ_INIT(&sq->sq_blocked[i]);
  909                 sq->sq_blockedcnt[i] = 0;
  910         }
  911         LIST_INIT(&sq->sq_free);
  912         return (0);
  913 }
  914 
  915 /*
  916  * Find thread sleeping on a wait channel and resume it.
  917  */
  918 int
  919 sleepq_signal(const void *wchan, int flags, int pri, int queue)
  920 {
  921         struct sleepqueue_chain *sc;
  922         struct sleepqueue *sq;
  923         struct threadqueue *head;
  924         struct thread *td, *besttd;
  925         int wakeup_swapper;
  926 
  927         CTR2(KTR_PROC, "sleepq_signal(%p, %d)", wchan, flags);
  928         KASSERT(wchan != NULL, ("%s: invalid NULL wait channel", __func__));
  929         MPASS((queue >= 0) && (queue < NR_SLEEPQS));
  930         sq = sleepq_lookup(wchan);
  931         if (sq == NULL) {
  932                 if (flags & SLEEPQ_DROP)
  933                         sleepq_release(wchan);
  934                 return (0);
  935         }
  936         KASSERT(sq->sq_type == (flags & SLEEPQ_TYPE),
  937             ("%s: mismatch between sleep/wakeup and cv_*", __func__));
  938 
  939         head = &sq->sq_blocked[queue];
  940         if (flags & SLEEPQ_UNFAIR) {
  941                 /*
  942                  * Find the most recently sleeping thread, but try to
  943                  * skip threads still in process of context switch to
  944                  * avoid spinning on the thread lock.
  945                  */
  946                 sc = SC_LOOKUP(wchan);
  947                 besttd = TAILQ_LAST_FAST(head, thread, td_slpq);
  948                 while (besttd->td_lock != &sc->sc_lock) {
  949                         td = TAILQ_PREV_FAST(besttd, head, thread, td_slpq);
  950                         if (td == NULL)
  951                                 break;
  952                         besttd = td;
  953                 }
  954         } else {
  955                 /*
  956                  * Find the highest priority thread on the queue.  If there
  957                  * is a tie, use the thread that first appears in the queue
  958                  * as it has been sleeping the longest since threads are
  959                  * always added to the tail of sleep queues.
  960                  */
  961                 besttd = td = TAILQ_FIRST(head);
  962                 while ((td = TAILQ_NEXT(td, td_slpq)) != NULL) {
  963                         if (td->td_priority < besttd->td_priority)
  964                                 besttd = td;
  965                 }
  966         }
  967         MPASS(besttd != NULL);
  968         wakeup_swapper = sleepq_resume_thread(sq, besttd, pri,
  969             (flags & SLEEPQ_DROP) ? 0 : SRQ_HOLD);
  970         return (wakeup_swapper);
  971 }
  972 
  973 static bool
  974 match_any(struct thread *td __unused)
  975 {
  976 
  977         return (true);
  978 }
  979 
  980 /*
  981  * Resume all threads sleeping on a specified wait channel.
  982  */
  983 int
  984 sleepq_broadcast(const void *wchan, int flags, int pri, int queue)
  985 {
  986         struct sleepqueue *sq;
  987 
  988         CTR2(KTR_PROC, "sleepq_broadcast(%p, %d)", wchan, flags);
  989         KASSERT(wchan != NULL, ("%s: invalid NULL wait channel", __func__));
  990         MPASS((queue >= 0) && (queue < NR_SLEEPQS));
  991         sq = sleepq_lookup(wchan);
  992         if (sq == NULL)
  993                 return (0);
  994         KASSERT(sq->sq_type == (flags & SLEEPQ_TYPE),
  995             ("%s: mismatch between sleep/wakeup and cv_*", __func__));
  996 
  997         return (sleepq_remove_matching(sq, queue, match_any, pri));
  998 }
  999 
 1000 /*
 1001  * Resume threads on the sleep queue that match the given predicate.
 1002  */
 1003 int
 1004 sleepq_remove_matching(struct sleepqueue *sq, int queue,
 1005     bool (*matches)(struct thread *), int pri)
 1006 {
 1007         struct thread *td, *tdn;
 1008         int wakeup_swapper;
 1009 
 1010         /*
 1011          * The last thread will be given ownership of sq and may
 1012          * re-enqueue itself before sleepq_resume_thread() returns,
 1013          * so we must cache the "next" queue item at the beginning
 1014          * of the final iteration.
 1015          */
 1016         wakeup_swapper = 0;
 1017         TAILQ_FOREACH_SAFE(td, &sq->sq_blocked[queue], td_slpq, tdn) {
 1018                 if (matches(td))
 1019                         wakeup_swapper |= sleepq_resume_thread(sq, td, pri,
 1020                             SRQ_HOLD);
 1021         }
 1022 
 1023         return (wakeup_swapper);
 1024 }
 1025 
 1026 /*
 1027  * Time sleeping threads out.  When the timeout expires, the thread is
 1028  * removed from the sleep queue and made runnable if it is still asleep.
 1029  */
 1030 static void
 1031 sleepq_timeout(void *arg)
 1032 {
 1033         struct sleepqueue_chain *sc __unused;
 1034         struct sleepqueue *sq;
 1035         struct thread *td;
 1036         const void *wchan;
 1037         int wakeup_swapper;
 1038 
 1039         td = arg;
 1040         CTR3(KTR_PROC, "sleepq_timeout: thread %p (pid %ld, %s)",
 1041             (void *)td, (long)td->td_proc->p_pid, (void *)td->td_name);
 1042 
 1043         thread_lock(td);
 1044         if (td->td_sleeptimo == 0 ||
 1045             td->td_sleeptimo > td->td_slpcallout.c_time) {
 1046                 /*
 1047                  * The thread does not want a timeout (yet).
 1048                  */
 1049         } else if (TD_IS_SLEEPING(td) && TD_ON_SLEEPQ(td)) {
 1050                 /*
 1051                  * See if the thread is asleep and get the wait
 1052                  * channel if it is.
 1053                  */
 1054                 wchan = td->td_wchan;
 1055                 sc = SC_LOOKUP(wchan);
 1056                 THREAD_LOCKPTR_ASSERT(td, &sc->sc_lock);
 1057                 sq = sleepq_lookup(wchan);
 1058                 MPASS(sq != NULL);
 1059                 td->td_flags |= TDF_TIMEOUT;
 1060                 wakeup_swapper = sleepq_resume_thread(sq, td, 0, 0);
 1061                 if (wakeup_swapper)
 1062                         kick_proc0();
 1063                 return;
 1064         } else if (TD_ON_SLEEPQ(td)) {
 1065                 /*
 1066                  * If the thread is on the SLEEPQ but isn't sleeping
 1067                  * yet, it can either be on another CPU in between
 1068                  * sleepq_add() and one of the sleepq_*wait*()
 1069                  * routines or it can be in sleepq_catch_signals().
 1070                  */
 1071                 td->td_flags |= TDF_TIMEOUT;
 1072         }
 1073         thread_unlock(td);
 1074 }
 1075 
 1076 /*
 1077  * Resumes a specific thread from the sleep queue associated with a specific
 1078  * wait channel if it is on that queue.
 1079  */
 1080 void
 1081 sleepq_remove(struct thread *td, const void *wchan)
 1082 {
 1083         struct sleepqueue_chain *sc;
 1084         struct sleepqueue *sq;
 1085         int wakeup_swapper;
 1086 
 1087         /*
 1088          * Look up the sleep queue for this wait channel, then re-check
 1089          * that the thread is asleep on that channel, if it is not, then
 1090          * bail.
 1091          */
 1092         MPASS(wchan != NULL);
 1093         sc = SC_LOOKUP(wchan);
 1094         mtx_lock_spin(&sc->sc_lock);
 1095         /*
 1096          * We can not lock the thread here as it may be sleeping on a
 1097          * different sleepq.  However, holding the sleepq lock for this
 1098          * wchan can guarantee that we do not miss a wakeup for this
 1099          * channel.  The asserts below will catch any false positives.
 1100          */
 1101         if (!TD_ON_SLEEPQ(td) || td->td_wchan != wchan) {
 1102                 mtx_unlock_spin(&sc->sc_lock);
 1103                 return;
 1104         }
 1105 
 1106         /* Thread is asleep on sleep queue sq, so wake it up. */
 1107         sq = sleepq_lookup(wchan);
 1108         MPASS(sq != NULL);
 1109         MPASS(td->td_wchan == wchan);
 1110         wakeup_swapper = sleepq_resume_thread(sq, td, 0, 0);
 1111         if (wakeup_swapper)
 1112                 kick_proc0();
 1113 }
 1114 
 1115 /*
 1116  * Abort a thread as if an interrupt had occurred.  Only abort
 1117  * interruptible waits (unfortunately it isn't safe to abort others).
 1118  *
 1119  * Requires thread lock on entry, releases on return.
 1120  */
 1121 int
 1122 sleepq_abort(struct thread *td, int intrval)
 1123 {
 1124         struct sleepqueue *sq;
 1125         const void *wchan;
 1126 
 1127         THREAD_LOCK_ASSERT(td, MA_OWNED);
 1128         MPASS(TD_ON_SLEEPQ(td));
 1129         MPASS(td->td_flags & TDF_SINTR);
 1130         MPASS((intrval == 0 && (td->td_flags & TDF_SIGWAIT) != 0) ||
 1131             intrval == EINTR || intrval == ERESTART);
 1132 
 1133         /*
 1134          * If the TDF_TIMEOUT flag is set, just leave. A
 1135          * timeout is scheduled anyhow.
 1136          */
 1137         if (td->td_flags & TDF_TIMEOUT) {
 1138                 thread_unlock(td);
 1139                 return (0);
 1140         }
 1141 
 1142         CTR3(KTR_PROC, "sleepq_abort: thread %p (pid %ld, %s)",
 1143             (void *)td, (long)td->td_proc->p_pid, (void *)td->td_name);
 1144         td->td_intrval = intrval;
 1145 
 1146         /*
 1147          * If the thread has not slept yet it will find the signal in
 1148          * sleepq_catch_signals() and call sleepq_resume_thread.  Otherwise
 1149          * we have to do it here.
 1150          */
 1151         if (!TD_IS_SLEEPING(td)) {
 1152                 thread_unlock(td);
 1153                 return (0);
 1154         }
 1155         wchan = td->td_wchan;
 1156         MPASS(wchan != NULL);
 1157         sq = sleepq_lookup(wchan);
 1158         MPASS(sq != NULL);
 1159 
 1160         /* Thread is asleep on sleep queue sq, so wake it up. */
 1161         return (sleepq_resume_thread(sq, td, 0, 0));
 1162 }
 1163 
 1164 void
 1165 sleepq_chains_remove_matching(bool (*matches)(struct thread *))
 1166 {
 1167         struct sleepqueue_chain *sc;
 1168         struct sleepqueue *sq, *sq1;
 1169         int i, wakeup_swapper;
 1170 
 1171         wakeup_swapper = 0;
 1172         for (sc = &sleepq_chains[0]; sc < sleepq_chains + SC_TABLESIZE; ++sc) {
 1173                 if (LIST_EMPTY(&sc->sc_queues)) {
 1174                         continue;
 1175                 }
 1176                 mtx_lock_spin(&sc->sc_lock);
 1177                 LIST_FOREACH_SAFE(sq, &sc->sc_queues, sq_hash, sq1) {
 1178                         for (i = 0; i < NR_SLEEPQS; ++i) {
 1179                                 wakeup_swapper |= sleepq_remove_matching(sq, i,
 1180                                     matches, 0);
 1181                         }
 1182                 }
 1183                 mtx_unlock_spin(&sc->sc_lock);
 1184         }
 1185         if (wakeup_swapper) {
 1186                 kick_proc0();
 1187         }
 1188 }
 1189 
 1190 /*
 1191  * Prints the stacks of all threads presently sleeping on wchan/queue to
 1192  * the sbuf sb.  Sets count_stacks_printed to the number of stacks actually
 1193  * printed.  Typically, this will equal the number of threads sleeping on the
 1194  * queue, but may be less if sb overflowed before all stacks were printed.
 1195  */
 1196 #ifdef STACK
 1197 int
 1198 sleepq_sbuf_print_stacks(struct sbuf *sb, const void *wchan, int queue,
 1199     int *count_stacks_printed)
 1200 {
 1201         struct thread *td, *td_next;
 1202         struct sleepqueue *sq;
 1203         struct stack **st;
 1204         struct sbuf **td_infos;
 1205         int i, stack_idx, error, stacks_to_allocate;
 1206         bool finished;
 1207 
 1208         error = 0;
 1209         finished = false;
 1210 
 1211         KASSERT(wchan != NULL, ("%s: invalid NULL wait channel", __func__));
 1212         MPASS((queue >= 0) && (queue < NR_SLEEPQS));
 1213 
 1214         stacks_to_allocate = 10;
 1215         for (i = 0; i < 3 && !finished ; i++) {
 1216                 /* We cannot malloc while holding the queue's spinlock, so
 1217                  * we do our mallocs now, and hope it is enough.  If it
 1218                  * isn't, we will free these, drop the lock, malloc more,
 1219                  * and try again, up to a point.  After that point we will
 1220                  * give up and report ENOMEM. We also cannot write to sb
 1221                  * during this time since the client may have set the
 1222                  * SBUF_AUTOEXTEND flag on their sbuf, which could cause a
 1223                  * malloc as we print to it.  So we defer actually printing
 1224                  * to sb until after we drop the spinlock.
 1225                  */
 1226 
 1227                 /* Where we will store the stacks. */
 1228                 st = malloc(sizeof(struct stack *) * stacks_to_allocate,
 1229                     M_TEMP, M_WAITOK);
 1230                 for (stack_idx = 0; stack_idx < stacks_to_allocate;
 1231                     stack_idx++)
 1232                         st[stack_idx] = stack_create(M_WAITOK);
 1233 
 1234                 /* Where we will store the td name, tid, etc. */
 1235                 td_infos = malloc(sizeof(struct sbuf *) * stacks_to_allocate,
 1236                     M_TEMP, M_WAITOK);
 1237                 for (stack_idx = 0; stack_idx < stacks_to_allocate;
 1238                     stack_idx++)
 1239                         td_infos[stack_idx] = sbuf_new(NULL, NULL,
 1240                             MAXCOMLEN + sizeof(struct thread *) * 2 + 40,
 1241                             SBUF_FIXEDLEN);
 1242 
 1243                 sleepq_lock(wchan);
 1244                 sq = sleepq_lookup(wchan);
 1245                 if (sq == NULL) {
 1246                         /* This sleepq does not exist; exit and return ENOENT. */
 1247                         error = ENOENT;
 1248                         finished = true;
 1249                         sleepq_release(wchan);
 1250                         goto loop_end;
 1251                 }
 1252 
 1253                 stack_idx = 0;
 1254                 /* Save thread info */
 1255                 TAILQ_FOREACH_SAFE(td, &sq->sq_blocked[queue], td_slpq,
 1256                     td_next) {
 1257                         if (stack_idx >= stacks_to_allocate)
 1258                                 goto loop_end;
 1259 
 1260                         /* Note the td_lock is equal to the sleepq_lock here. */
 1261                         (void)stack_save_td(st[stack_idx], td);
 1262 
 1263                         sbuf_printf(td_infos[stack_idx], "%d: %s %p",
 1264                             td->td_tid, td->td_name, td);
 1265 
 1266                         ++stack_idx;
 1267                 }
 1268 
 1269                 finished = true;
 1270                 sleepq_release(wchan);
 1271 
 1272                 /* Print the stacks */
 1273                 for (i = 0; i < stack_idx; i++) {
 1274                         sbuf_finish(td_infos[i]);
 1275                         sbuf_printf(sb, "--- thread %s: ---\n", sbuf_data(td_infos[i]));
 1276                         stack_sbuf_print(sb, st[i]);
 1277                         sbuf_printf(sb, "\n");
 1278 
 1279                         error = sbuf_error(sb);
 1280                         if (error == 0)
 1281                                 *count_stacks_printed = stack_idx;
 1282                 }
 1283 
 1284 loop_end:
 1285                 if (!finished)
 1286                         sleepq_release(wchan);
 1287                 for (stack_idx = 0; stack_idx < stacks_to_allocate;
 1288                     stack_idx++)
 1289                         stack_destroy(st[stack_idx]);
 1290                 for (stack_idx = 0; stack_idx < stacks_to_allocate;
 1291                     stack_idx++)
 1292                         sbuf_delete(td_infos[stack_idx]);
 1293                 free(st, M_TEMP);
 1294                 free(td_infos, M_TEMP);
 1295                 stacks_to_allocate *= 10;
 1296         }
 1297 
 1298         if (!finished && error == 0)
 1299                 error = ENOMEM;
 1300 
 1301         return (error);
 1302 }
 1303 #endif
 1304 
 1305 #ifdef SLEEPQUEUE_PROFILING
 1306 #define SLEEPQ_PROF_LOCATIONS   1024
 1307 #define SLEEPQ_SBUFSIZE         512
 1308 struct sleepq_prof {
 1309         LIST_ENTRY(sleepq_prof) sp_link;
 1310         const char      *sp_wmesg;
 1311         long            sp_count;
 1312 };
 1313 
 1314 LIST_HEAD(sqphead, sleepq_prof);
 1315 
 1316 struct sqphead sleepq_prof_free;
 1317 struct sqphead sleepq_hash[SC_TABLESIZE];
 1318 static struct sleepq_prof sleepq_profent[SLEEPQ_PROF_LOCATIONS];
 1319 static struct mtx sleepq_prof_lock;
 1320 MTX_SYSINIT(sleepq_prof_lock, &sleepq_prof_lock, "sleepq_prof", MTX_SPIN);
 1321 
 1322 static void
 1323 sleepq_profile(const char *wmesg)
 1324 {
 1325         struct sleepq_prof *sp;
 1326 
 1327         mtx_lock_spin(&sleepq_prof_lock);
 1328         if (prof_enabled == 0)
 1329                 goto unlock;
 1330         LIST_FOREACH(sp, &sleepq_hash[SC_HASH(wmesg)], sp_link)
 1331                 if (sp->sp_wmesg == wmesg)
 1332                         goto done;
 1333         sp = LIST_FIRST(&sleepq_prof_free);
 1334         if (sp == NULL)
 1335                 goto unlock;
 1336         sp->sp_wmesg = wmesg;
 1337         LIST_REMOVE(sp, sp_link);
 1338         LIST_INSERT_HEAD(&sleepq_hash[SC_HASH(wmesg)], sp, sp_link);
 1339 done:
 1340         sp->sp_count++;
 1341 unlock:
 1342         mtx_unlock_spin(&sleepq_prof_lock);
 1343         return;
 1344 }
 1345 
 1346 static void
 1347 sleepq_prof_reset(void)
 1348 {
 1349         struct sleepq_prof *sp;
 1350         int enabled;
 1351         int i;
 1352 
 1353         mtx_lock_spin(&sleepq_prof_lock);
 1354         enabled = prof_enabled;
 1355         prof_enabled = 0;
 1356         for (i = 0; i < SC_TABLESIZE; i++)
 1357                 LIST_INIT(&sleepq_hash[i]);
 1358         LIST_INIT(&sleepq_prof_free);
 1359         for (i = 0; i < SLEEPQ_PROF_LOCATIONS; i++) {
 1360                 sp = &sleepq_profent[i];
 1361                 sp->sp_wmesg = NULL;
 1362                 sp->sp_count = 0;
 1363                 LIST_INSERT_HEAD(&sleepq_prof_free, sp, sp_link);
 1364         }
 1365         prof_enabled = enabled;
 1366         mtx_unlock_spin(&sleepq_prof_lock);
 1367 }
 1368 
 1369 static int
 1370 enable_sleepq_prof(SYSCTL_HANDLER_ARGS)
 1371 {
 1372         int error, v;
 1373 
 1374         v = prof_enabled;
 1375         error = sysctl_handle_int(oidp, &v, v, req);
 1376         if (error)
 1377                 return (error);
 1378         if (req->newptr == NULL)
 1379                 return (error);
 1380         if (v == prof_enabled)
 1381                 return (0);
 1382         if (v == 1)
 1383                 sleepq_prof_reset();
 1384         mtx_lock_spin(&sleepq_prof_lock);
 1385         prof_enabled = !!v;
 1386         mtx_unlock_spin(&sleepq_prof_lock);
 1387 
 1388         return (0);
 1389 }
 1390 
 1391 static int
 1392 reset_sleepq_prof_stats(SYSCTL_HANDLER_ARGS)
 1393 {
 1394         int error, v;
 1395 
 1396         v = 0;
 1397         error = sysctl_handle_int(oidp, &v, 0, req);
 1398         if (error)
 1399                 return (error);
 1400         if (req->newptr == NULL)
 1401                 return (error);
 1402         if (v == 0)
 1403                 return (0);
 1404         sleepq_prof_reset();
 1405 
 1406         return (0);
 1407 }
 1408 
 1409 static int
 1410 dump_sleepq_prof_stats(SYSCTL_HANDLER_ARGS)
 1411 {
 1412         struct sleepq_prof *sp;
 1413         struct sbuf *sb;
 1414         int enabled;
 1415         int error;
 1416         int i;
 1417 
 1418         error = sysctl_wire_old_buffer(req, 0);
 1419         if (error != 0)
 1420                 return (error);
 1421         sb = sbuf_new_for_sysctl(NULL, NULL, SLEEPQ_SBUFSIZE, req);
 1422         sbuf_printf(sb, "\nwmesg\tcount\n");
 1423         enabled = prof_enabled;
 1424         mtx_lock_spin(&sleepq_prof_lock);
 1425         prof_enabled = 0;
 1426         mtx_unlock_spin(&sleepq_prof_lock);
 1427         for (i = 0; i < SC_TABLESIZE; i++) {
 1428                 LIST_FOREACH(sp, &sleepq_hash[i], sp_link) {
 1429                         sbuf_printf(sb, "%s\t%ld\n",
 1430                             sp->sp_wmesg, sp->sp_count);
 1431                 }
 1432         }
 1433         mtx_lock_spin(&sleepq_prof_lock);
 1434         prof_enabled = enabled;
 1435         mtx_unlock_spin(&sleepq_prof_lock);
 1436 
 1437         error = sbuf_finish(sb);
 1438         sbuf_delete(sb);
 1439         return (error);
 1440 }
 1441 
 1442 SYSCTL_PROC(_debug_sleepq, OID_AUTO, stats,
 1443     CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_NEEDGIANT, NULL, 0,
 1444     dump_sleepq_prof_stats, "A",
 1445     "Sleepqueue profiling statistics");
 1446 SYSCTL_PROC(_debug_sleepq, OID_AUTO, reset,
 1447     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, 0,
 1448     reset_sleepq_prof_stats, "I",
 1449     "Reset sleepqueue profiling statistics");
 1450 SYSCTL_PROC(_debug_sleepq, OID_AUTO, enable,
 1451     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, 0,
 1452     enable_sleepq_prof, "I",
 1453     "Enable sleepqueue profiling");
 1454 #endif
 1455 
 1456 #ifdef DDB
 1457 DB_SHOW_COMMAND(sleepq, db_show_sleepqueue)
 1458 {
 1459         struct sleepqueue_chain *sc;
 1460         struct sleepqueue *sq;
 1461 #ifdef INVARIANTS
 1462         struct lock_object *lock;
 1463 #endif
 1464         struct thread *td;
 1465         void *wchan;
 1466         int i;
 1467 
 1468         if (!have_addr)
 1469                 return;
 1470 
 1471         /*
 1472          * First, see if there is an active sleep queue for the wait channel
 1473          * indicated by the address.
 1474          */
 1475         wchan = (void *)addr;
 1476         sc = SC_LOOKUP(wchan);
 1477         LIST_FOREACH(sq, &sc->sc_queues, sq_hash)
 1478                 if (sq->sq_wchan == wchan)
 1479                         goto found;
 1480 
 1481         /*
 1482          * Second, see if there is an active sleep queue at the address
 1483          * indicated.
 1484          */
 1485         for (i = 0; i < SC_TABLESIZE; i++)
 1486                 LIST_FOREACH(sq, &sleepq_chains[i].sc_queues, sq_hash) {
 1487                         if (sq == (struct sleepqueue *)addr)
 1488                                 goto found;
 1489                 }
 1490 
 1491         db_printf("Unable to locate a sleep queue via %p\n", (void *)addr);
 1492         return;
 1493 found:
 1494         db_printf("Wait channel: %p\n", sq->sq_wchan);
 1495         db_printf("Queue type: %d\n", sq->sq_type);
 1496 #ifdef INVARIANTS
 1497         if (sq->sq_lock) {
 1498                 lock = sq->sq_lock;
 1499                 db_printf("Associated Interlock: %p - (%s) %s\n", lock,
 1500                     LOCK_CLASS(lock)->lc_name, lock->lo_name);
 1501         }
 1502 #endif
 1503         db_printf("Blocked threads:\n");
 1504         for (i = 0; i < NR_SLEEPQS; i++) {
 1505                 db_printf("\nQueue[%d]:\n", i);
 1506                 if (TAILQ_EMPTY(&sq->sq_blocked[i]))
 1507                         db_printf("\tempty\n");
 1508                 else
 1509                         TAILQ_FOREACH(td, &sq->sq_blocked[i],
 1510                                       td_slpq) {
 1511                                 db_printf("\t%p (tid %d, pid %d, \"%s\")\n", td,
 1512                                           td->td_tid, td->td_proc->p_pid,
 1513                                           td->td_name);
 1514                         }
 1515                 db_printf("(expected: %u)\n", sq->sq_blockedcnt[i]);
 1516         }
 1517 }
 1518 
 1519 /* Alias 'show sleepqueue' to 'show sleepq'. */
 1520 DB_SHOW_ALIAS(sleepqueue, db_show_sleepqueue);
 1521 #endif

Cache object: 453a0275719f789858c9ffcd77c36e87


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.