The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/subr_sleepqueue.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
    3  *
    4  * Copyright (c) 2004 John Baldwin <jhb@FreeBSD.org>
    5  *
    6  * Redistribution and use in source and binary forms, with or without
    7  * modification, are permitted provided that the following conditions
    8  * are met:
    9  * 1. Redistributions of source code must retain the above copyright
   10  *    notice, this list of conditions and the following disclaimer.
   11  * 2. Redistributions in binary form must reproduce the above copyright
   12  *    notice, this list of conditions and the following disclaimer in the
   13  *    documentation and/or other materials provided with the distribution.
   14  *
   15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   25  * SUCH DAMAGE.
   26  */
   27 
   28 /*
   29  * Implementation of sleep queues used to hold queue of threads blocked on
   30  * a wait channel.  Sleep queues are different from turnstiles in that wait
   31  * channels are not owned by anyone, so there is no priority propagation.
   32  * Sleep queues can also provide a timeout and can also be interrupted by
   33  * signals.  That said, there are several similarities between the turnstile
   34  * and sleep queue implementations.  (Note: turnstiles were implemented
   35  * first.)  For example, both use a hash table of the same size where each
   36  * bucket is referred to as a "chain" that contains both a spin lock and
   37  * a linked list of queues.  An individual queue is located by using a hash
   38  * to pick a chain, locking the chain, and then walking the chain searching
   39  * for the queue.  This means that a wait channel object does not need to
   40  * embed its queue head just as locks do not embed their turnstile queue
   41  * head.  Threads also carry around a sleep queue that they lend to the
   42  * wait channel when blocking.  Just as in turnstiles, the queue includes
   43  * a free list of the sleep queues of other threads blocked on the same
   44  * wait channel in the case of multiple waiters.
   45  *
   46  * Some additional functionality provided by sleep queues include the
   47  * ability to set a timeout.  The timeout is managed using a per-thread
   48  * callout that resumes a thread if it is asleep.  A thread may also
   49  * catch signals while it is asleep (aka an interruptible sleep).  The
   50  * signal code uses sleepq_abort() to interrupt a sleeping thread.  Finally,
   51  * sleep queues also provide some extra assertions.  One is not allowed to
   52  * mix the sleep/wakeup and cv APIs for a given wait channel.  Also, one
   53  * must consistently use the same lock to synchronize with a wait channel,
   54  * though this check is currently only a warning for sleep/wakeup due to
   55  * pre-existing abuse of that API.  The same lock must also be held when
   56  * awakening threads, though that is currently only enforced for condition
   57  * variables.
   58  */
   59 
   60 #include <sys/cdefs.h>
   61 __FBSDID("$FreeBSD$");
   62 
   63 #include "opt_sleepqueue_profiling.h"
   64 #include "opt_ddb.h"
   65 #include "opt_sched.h"
   66 #include "opt_stack.h"
   67 
   68 #include <sys/param.h>
   69 #include <sys/systm.h>
   70 #include <sys/lock.h>
   71 #include <sys/kernel.h>
   72 #include <sys/ktr.h>
   73 #include <sys/mutex.h>
   74 #include <sys/proc.h>
   75 #include <sys/sbuf.h>
   76 #include <sys/sched.h>
   77 #include <sys/sdt.h>
   78 #include <sys/signalvar.h>
   79 #include <sys/sleepqueue.h>
   80 #include <sys/stack.h>
   81 #include <sys/sysctl.h>
   82 #include <sys/time.h>
   83 #ifdef EPOCH_TRACE
   84 #include <sys/epoch.h>
   85 #endif
   86 
   87 #include <machine/atomic.h>
   88 
   89 #include <vm/uma.h>
   90 
   91 #ifdef DDB
   92 #include <ddb/ddb.h>
   93 #endif
   94 
   95 /*
   96  * Constants for the hash table of sleep queue chains.
   97  * SC_TABLESIZE must be a power of two for SC_MASK to work properly.
   98  */
   99 #ifndef SC_TABLESIZE
  100 #define SC_TABLESIZE    256
  101 #endif
  102 CTASSERT(powerof2(SC_TABLESIZE));
  103 #define SC_MASK         (SC_TABLESIZE - 1)
  104 #define SC_SHIFT        8
  105 #define SC_HASH(wc)     ((((uintptr_t)(wc) >> SC_SHIFT) ^ (uintptr_t)(wc)) & \
  106                             SC_MASK)
  107 #define SC_LOOKUP(wc)   &sleepq_chains[SC_HASH(wc)]
  108 #define NR_SLEEPQS      2
  109 /*
  110  * There are two different lists of sleep queues.  Both lists are connected
  111  * via the sq_hash entries.  The first list is the sleep queue chain list
  112  * that a sleep queue is on when it is attached to a wait channel.  The
  113  * second list is the free list hung off of a sleep queue that is attached
  114  * to a wait channel.
  115  *
  116  * Each sleep queue also contains the wait channel it is attached to, the
  117  * list of threads blocked on that wait channel, flags specific to the
  118  * wait channel, and the lock used to synchronize with a wait channel.
  119  * The flags are used to catch mismatches between the various consumers
  120  * of the sleep queue API (e.g. sleep/wakeup and condition variables).
  121  * The lock pointer is only used when invariants are enabled for various
  122  * debugging checks.
  123  *
  124  * Locking key:
  125  *  c - sleep queue chain lock
  126  */
  127 struct sleepqueue {
  128         struct threadqueue sq_blocked[NR_SLEEPQS]; /* (c) Blocked threads. */
  129         u_int sq_blockedcnt[NR_SLEEPQS];        /* (c) N. of blocked threads. */
  130         LIST_ENTRY(sleepqueue) sq_hash;         /* (c) Chain and free list. */
  131         LIST_HEAD(, sleepqueue) sq_free;        /* (c) Free queues. */
  132         const void      *sq_wchan;              /* (c) Wait channel. */
  133         int     sq_type;                        /* (c) Queue type. */
  134 #ifdef INVARIANTS
  135         struct lock_object *sq_lock;            /* (c) Associated lock. */
  136 #endif
  137 };
  138 
  139 struct sleepqueue_chain {
  140         LIST_HEAD(, sleepqueue) sc_queues;      /* List of sleep queues. */
  141         struct mtx sc_lock;                     /* Spin lock for this chain. */
  142 #ifdef SLEEPQUEUE_PROFILING
  143         u_int   sc_depth;                       /* Length of sc_queues. */
  144         u_int   sc_max_depth;                   /* Max length of sc_queues. */
  145 #endif
  146 } __aligned(CACHE_LINE_SIZE);
  147 
  148 #ifdef SLEEPQUEUE_PROFILING
  149 u_int sleepq_max_depth;
  150 static SYSCTL_NODE(_debug, OID_AUTO, sleepq, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
  151     "sleepq profiling");
  152 static SYSCTL_NODE(_debug_sleepq, OID_AUTO, chains,
  153     CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
  154     "sleepq chain stats");
  155 SYSCTL_UINT(_debug_sleepq, OID_AUTO, max_depth, CTLFLAG_RD, &sleepq_max_depth,
  156     0, "maxmimum depth achieved of a single chain");
  157 
  158 static void     sleepq_profile(const char *wmesg);
  159 static int      prof_enabled;
  160 #endif
  161 static struct sleepqueue_chain sleepq_chains[SC_TABLESIZE];
  162 static uma_zone_t sleepq_zone;
  163 
  164 /*
  165  * Prototypes for non-exported routines.
  166  */
  167 static int      sleepq_catch_signals(const void *wchan, int pri);
  168 static inline int sleepq_check_signals(void);
  169 static inline int sleepq_check_timeout(void);
  170 #ifdef INVARIANTS
  171 static void     sleepq_dtor(void *mem, int size, void *arg);
  172 #endif
  173 static int      sleepq_init(void *mem, int size, int flags);
  174 static int      sleepq_resume_thread(struct sleepqueue *sq, struct thread *td,
  175                     int pri, int srqflags);
  176 static void     sleepq_remove_thread(struct sleepqueue *sq, struct thread *td);
  177 static void     sleepq_switch(const void *wchan, int pri);
  178 static void     sleepq_timeout(void *arg);
  179 
  180 SDT_PROBE_DECLARE(sched, , , sleep);
  181 SDT_PROBE_DECLARE(sched, , , wakeup);
  182 
  183 /*
  184  * Initialize SLEEPQUEUE_PROFILING specific sysctl nodes.
  185  * Note that it must happen after sleepinit() has been fully executed, so
  186  * it must happen after SI_SUB_KMEM SYSINIT() subsystem setup.
  187  */
  188 #ifdef SLEEPQUEUE_PROFILING
  189 static void
  190 init_sleepqueue_profiling(void)
  191 {
  192         char chain_name[10];
  193         struct sysctl_oid *chain_oid;
  194         u_int i;
  195 
  196         for (i = 0; i < SC_TABLESIZE; i++) {
  197                 snprintf(chain_name, sizeof(chain_name), "%u", i);
  198                 chain_oid = SYSCTL_ADD_NODE(NULL,
  199                     SYSCTL_STATIC_CHILDREN(_debug_sleepq_chains), OID_AUTO,
  200                     chain_name, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
  201                     "sleepq chain stats");
  202                 SYSCTL_ADD_UINT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO,
  203                     "depth", CTLFLAG_RD, &sleepq_chains[i].sc_depth, 0, NULL);
  204                 SYSCTL_ADD_UINT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO,
  205                     "max_depth", CTLFLAG_RD, &sleepq_chains[i].sc_max_depth, 0,
  206                     NULL);
  207         }
  208 }
  209 
  210 SYSINIT(sleepqueue_profiling, SI_SUB_LOCK, SI_ORDER_ANY,
  211     init_sleepqueue_profiling, NULL);
  212 #endif
  213 
  214 /*
  215  * Early initialization of sleep queues that is called from the sleepinit()
  216  * SYSINIT.
  217  */
  218 void
  219 init_sleepqueues(void)
  220 {
  221         int i;
  222 
  223         for (i = 0; i < SC_TABLESIZE; i++) {
  224                 LIST_INIT(&sleepq_chains[i].sc_queues);
  225                 mtx_init(&sleepq_chains[i].sc_lock, "sleepq chain", NULL,
  226                     MTX_SPIN);
  227         }
  228         sleepq_zone = uma_zcreate("SLEEPQUEUE", sizeof(struct sleepqueue),
  229 #ifdef INVARIANTS
  230             NULL, sleepq_dtor, sleepq_init, NULL, UMA_ALIGN_CACHE, 0);
  231 #else
  232             NULL, NULL, sleepq_init, NULL, UMA_ALIGN_CACHE, 0);
  233 #endif
  234 
  235         thread0.td_sleepqueue = sleepq_alloc();
  236 }
  237 
  238 /*
  239  * Get a sleep queue for a new thread.
  240  */
  241 struct sleepqueue *
  242 sleepq_alloc(void)
  243 {
  244 
  245         return (uma_zalloc(sleepq_zone, M_WAITOK));
  246 }
  247 
  248 /*
  249  * Free a sleep queue when a thread is destroyed.
  250  */
  251 void
  252 sleepq_free(struct sleepqueue *sq)
  253 {
  254 
  255         uma_zfree(sleepq_zone, sq);
  256 }
  257 
  258 /*
  259  * Lock the sleep queue chain associated with the specified wait channel.
  260  */
  261 void
  262 sleepq_lock(const void *wchan)
  263 {
  264         struct sleepqueue_chain *sc;
  265 
  266         sc = SC_LOOKUP(wchan);
  267         mtx_lock_spin(&sc->sc_lock);
  268 }
  269 
  270 /*
  271  * Look up the sleep queue associated with a given wait channel in the hash
  272  * table locking the associated sleep queue chain.  If no queue is found in
  273  * the table, NULL is returned.
  274  */
  275 struct sleepqueue *
  276 sleepq_lookup(const void *wchan)
  277 {
  278         struct sleepqueue_chain *sc;
  279         struct sleepqueue *sq;
  280 
  281         KASSERT(wchan != NULL, ("%s: invalid NULL wait channel", __func__));
  282         sc = SC_LOOKUP(wchan);
  283         mtx_assert(&sc->sc_lock, MA_OWNED);
  284         LIST_FOREACH(sq, &sc->sc_queues, sq_hash)
  285                 if (sq->sq_wchan == wchan)
  286                         return (sq);
  287         return (NULL);
  288 }
  289 
  290 /*
  291  * Unlock the sleep queue chain associated with a given wait channel.
  292  */
  293 void
  294 sleepq_release(const void *wchan)
  295 {
  296         struct sleepqueue_chain *sc;
  297 
  298         sc = SC_LOOKUP(wchan);
  299         mtx_unlock_spin(&sc->sc_lock);
  300 }
  301 
  302 /*
  303  * Places the current thread on the sleep queue for the specified wait
  304  * channel.  If INVARIANTS is enabled, then it associates the passed in
  305  * lock with the sleepq to make sure it is held when that sleep queue is
  306  * woken up.
  307  */
  308 void
  309 sleepq_add(const void *wchan, struct lock_object *lock, const char *wmesg,
  310     int flags, int queue)
  311 {
  312         struct sleepqueue_chain *sc;
  313         struct sleepqueue *sq;
  314         struct thread *td;
  315 
  316         td = curthread;
  317         sc = SC_LOOKUP(wchan);
  318         mtx_assert(&sc->sc_lock, MA_OWNED);
  319         MPASS(td->td_sleepqueue != NULL);
  320         MPASS(wchan != NULL);
  321         MPASS((queue >= 0) && (queue < NR_SLEEPQS));
  322 
  323         /* If this thread is not allowed to sleep, die a horrible death. */
  324         if (__predict_false(!THREAD_CAN_SLEEP())) {
  325 #ifdef EPOCH_TRACE
  326                 epoch_trace_list(curthread);
  327 #endif
  328                 KASSERT(0,
  329                     ("%s: td %p to sleep on wchan %p with sleeping prohibited",
  330                     __func__, td, wchan));
  331         }
  332 
  333         /* Look up the sleep queue associated with the wait channel 'wchan'. */
  334         sq = sleepq_lookup(wchan);
  335 
  336         /*
  337          * If the wait channel does not already have a sleep queue, use
  338          * this thread's sleep queue.  Otherwise, insert the current thread
  339          * into the sleep queue already in use by this wait channel.
  340          */
  341         if (sq == NULL) {
  342 #ifdef INVARIANTS
  343                 int i;
  344 
  345                 sq = td->td_sleepqueue;
  346                 for (i = 0; i < NR_SLEEPQS; i++) {
  347                         KASSERT(TAILQ_EMPTY(&sq->sq_blocked[i]),
  348                             ("thread's sleep queue %d is not empty", i));
  349                         KASSERT(sq->sq_blockedcnt[i] == 0,
  350                             ("thread's sleep queue %d count mismatches", i));
  351                 }
  352                 KASSERT(LIST_EMPTY(&sq->sq_free),
  353                     ("thread's sleep queue has a non-empty free list"));
  354                 KASSERT(sq->sq_wchan == NULL, ("stale sq_wchan pointer"));
  355                 sq->sq_lock = lock;
  356 #endif
  357 #ifdef SLEEPQUEUE_PROFILING
  358                 sc->sc_depth++;
  359                 if (sc->sc_depth > sc->sc_max_depth) {
  360                         sc->sc_max_depth = sc->sc_depth;
  361                         if (sc->sc_max_depth > sleepq_max_depth)
  362                                 sleepq_max_depth = sc->sc_max_depth;
  363                 }
  364 #endif
  365                 sq = td->td_sleepqueue;
  366                 LIST_INSERT_HEAD(&sc->sc_queues, sq, sq_hash);
  367                 sq->sq_wchan = wchan;
  368                 sq->sq_type = flags & SLEEPQ_TYPE;
  369         } else {
  370                 MPASS(wchan == sq->sq_wchan);
  371                 MPASS(lock == sq->sq_lock);
  372                 MPASS((flags & SLEEPQ_TYPE) == sq->sq_type);
  373                 LIST_INSERT_HEAD(&sq->sq_free, td->td_sleepqueue, sq_hash);
  374         }
  375         thread_lock(td);
  376         TAILQ_INSERT_TAIL(&sq->sq_blocked[queue], td, td_slpq);
  377         sq->sq_blockedcnt[queue]++;
  378         td->td_sleepqueue = NULL;
  379         td->td_sqqueue = queue;
  380         td->td_wchan = wchan;
  381         td->td_wmesg = wmesg;
  382         if (flags & SLEEPQ_INTERRUPTIBLE) {
  383                 td->td_intrval = 0;
  384                 td->td_flags |= TDF_SINTR;
  385         }
  386         td->td_flags &= ~TDF_TIMEOUT;
  387         thread_unlock(td);
  388 }
  389 
  390 /*
  391  * Sets a timeout that will remove the current thread from the specified
  392  * sleep queue after timo ticks if the thread has not already been awakened.
  393  */
  394 void
  395 sleepq_set_timeout_sbt(const void *wchan, sbintime_t sbt, sbintime_t pr,
  396     int flags)
  397 {
  398         struct sleepqueue_chain *sc __unused;
  399         struct thread *td;
  400         sbintime_t pr1;
  401 
  402         td = curthread;
  403         sc = SC_LOOKUP(wchan);
  404         mtx_assert(&sc->sc_lock, MA_OWNED);
  405         MPASS(TD_ON_SLEEPQ(td));
  406         MPASS(td->td_sleepqueue == NULL);
  407         MPASS(wchan != NULL);
  408         if (cold && td == &thread0)
  409                 panic("timed sleep before timers are working");
  410         KASSERT(td->td_sleeptimo == 0, ("td %d %p td_sleeptimo %jx",
  411             td->td_tid, td, (uintmax_t)td->td_sleeptimo));
  412         thread_lock(td);
  413         callout_when(sbt, pr, flags, &td->td_sleeptimo, &pr1);
  414         thread_unlock(td);
  415         callout_reset_sbt_on(&td->td_slpcallout, td->td_sleeptimo, pr1,
  416             sleepq_timeout, td, PCPU_GET(cpuid), flags | C_PRECALC |
  417             C_DIRECT_EXEC);
  418 }
  419 
  420 /*
  421  * Return the number of actual sleepers for the specified queue.
  422  */
  423 u_int
  424 sleepq_sleepcnt(const void *wchan, int queue)
  425 {
  426         struct sleepqueue *sq;
  427 
  428         KASSERT(wchan != NULL, ("%s: invalid NULL wait channel", __func__));
  429         MPASS((queue >= 0) && (queue < NR_SLEEPQS));
  430         sq = sleepq_lookup(wchan);
  431         if (sq == NULL)
  432                 return (0);
  433         return (sq->sq_blockedcnt[queue]);
  434 }
  435 
  436 static int
  437 sleepq_check_ast_sc_locked(struct thread *td, struct sleepqueue_chain *sc)
  438 {
  439         struct proc *p;
  440         int ret;
  441 
  442         mtx_assert(&sc->sc_lock, MA_OWNED);
  443 
  444         if ((td->td_pflags & TDP_WAKEUP) != 0) {
  445                 td->td_pflags &= ~TDP_WAKEUP;
  446                 thread_lock(td);
  447                 return (EINTR);
  448         }
  449 
  450         /*
  451          * See if there are any pending signals or suspension requests for this
  452          * thread.  If not, we can switch immediately.
  453          */
  454         thread_lock(td);
  455         if ((td->td_flags & (TDF_NEEDSIGCHK | TDF_NEEDSUSPCHK)) == 0)
  456                 return (0);
  457 
  458         thread_unlock(td);
  459         mtx_unlock_spin(&sc->sc_lock);
  460 
  461         p = td->td_proc;
  462         CTR3(KTR_PROC, "sleepq catching signals: thread %p (pid %ld, %s)",
  463             (void *)td, (long)p->p_pid, td->td_name);
  464         PROC_LOCK(p);
  465 
  466         /*
  467          * Check for suspension first. Checking for signals and then
  468          * suspending could result in a missed signal, since a signal
  469          * can be delivered while this thread is suspended.
  470          */
  471         ret = sig_ast_checksusp(td);
  472         if (ret != 0) {
  473                 PROC_UNLOCK(p);
  474                 mtx_lock_spin(&sc->sc_lock);
  475                 thread_lock(td);
  476                 return (ret);
  477         }
  478 
  479         ret = sig_ast_needsigchk(td);
  480 
  481         /*
  482          * Lock the per-process spinlock prior to dropping the
  483          * PROC_LOCK to avoid a signal delivery race.
  484          * PROC_LOCK, PROC_SLOCK, and thread_lock() are
  485          * currently held in tdsendsignal().
  486          */
  487         PROC_SLOCK(p);
  488         mtx_lock_spin(&sc->sc_lock);
  489         PROC_UNLOCK(p);
  490         thread_lock(td);
  491         PROC_SUNLOCK(p);
  492 
  493         return (ret);
  494 }
  495 
  496 /*
  497  * Marks the pending sleep of the current thread as interruptible and
  498  * makes an initial check for pending signals before putting a thread
  499  * to sleep. Enters and exits with the thread lock held.  Thread lock
  500  * may have transitioned from the sleepq lock to a run lock.
  501  */
  502 static int
  503 sleepq_catch_signals(const void *wchan, int pri)
  504 {
  505         struct thread *td;
  506         struct sleepqueue_chain *sc;
  507         struct sleepqueue *sq;
  508         int ret;
  509 
  510         sc = SC_LOOKUP(wchan);
  511         mtx_assert(&sc->sc_lock, MA_OWNED);
  512         MPASS(wchan != NULL);
  513         td = curthread;
  514 
  515         ret = sleepq_check_ast_sc_locked(td, sc);
  516         THREAD_LOCK_ASSERT(td, MA_OWNED);
  517         mtx_assert(&sc->sc_lock, MA_OWNED);
  518 
  519         if (ret == 0) {
  520                 /*
  521                  * No pending signals and no suspension requests found.
  522                  * Switch the thread off the cpu.
  523                  */
  524                 sleepq_switch(wchan, pri);
  525         } else {
  526                 /*
  527                  * There were pending signals and this thread is still
  528                  * on the sleep queue, remove it from the sleep queue.
  529                  */
  530                 if (TD_ON_SLEEPQ(td)) {
  531                         sq = sleepq_lookup(wchan);
  532                         sleepq_remove_thread(sq, td);
  533                 }
  534                 MPASS(td->td_lock != &sc->sc_lock);
  535                 mtx_unlock_spin(&sc->sc_lock);
  536                 thread_unlock(td);
  537         }
  538         return (ret);
  539 }
  540 
  541 /*
  542  * Switches to another thread if we are still asleep on a sleep queue.
  543  * Returns with thread lock.
  544  */
  545 static void
  546 sleepq_switch(const void *wchan, int pri)
  547 {
  548         struct sleepqueue_chain *sc;
  549         struct sleepqueue *sq;
  550         struct thread *td;
  551         bool rtc_changed;
  552 
  553         td = curthread;
  554         sc = SC_LOOKUP(wchan);
  555         mtx_assert(&sc->sc_lock, MA_OWNED);
  556         THREAD_LOCK_ASSERT(td, MA_OWNED);
  557 
  558         /*
  559          * If we have a sleep queue, then we've already been woken up, so
  560          * just return.
  561          */
  562         if (td->td_sleepqueue != NULL) {
  563                 mtx_unlock_spin(&sc->sc_lock);
  564                 thread_unlock(td);
  565                 return;
  566         }
  567 
  568         /*
  569          * If TDF_TIMEOUT is set, then our sleep has been timed out
  570          * already but we are still on the sleep queue, so dequeue the
  571          * thread and return.
  572          *
  573          * Do the same if the real-time clock has been adjusted since this
  574          * thread calculated its timeout based on that clock.  This handles
  575          * the following race:
  576          * - The Ts thread needs to sleep until an absolute real-clock time.
  577          *   It copies the global rtc_generation into curthread->td_rtcgen,
  578          *   reads the RTC, and calculates a sleep duration based on that time.
  579          *   See umtxq_sleep() for an example.
  580          * - The Tc thread adjusts the RTC, bumps rtc_generation, and wakes
  581          *   threads that are sleeping until an absolute real-clock time.
  582          *   See tc_setclock() and the POSIX specification of clock_settime().
  583          * - Ts reaches the code below.  It holds the sleepqueue chain lock,
  584          *   so Tc has finished waking, so this thread must test td_rtcgen.
  585          * (The declaration of td_rtcgen refers to this comment.)
  586          */
  587         rtc_changed = td->td_rtcgen != 0 && td->td_rtcgen != rtc_generation;
  588         if ((td->td_flags & TDF_TIMEOUT) || rtc_changed) {
  589                 if (rtc_changed) {
  590                         td->td_rtcgen = 0;
  591                 }
  592                 MPASS(TD_ON_SLEEPQ(td));
  593                 sq = sleepq_lookup(wchan);
  594                 sleepq_remove_thread(sq, td);
  595                 mtx_unlock_spin(&sc->sc_lock);
  596                 thread_unlock(td);
  597                 return;
  598         }
  599 #ifdef SLEEPQUEUE_PROFILING
  600         if (prof_enabled)
  601                 sleepq_profile(td->td_wmesg);
  602 #endif
  603         MPASS(td->td_sleepqueue == NULL);
  604         sched_sleep(td, pri);
  605         thread_lock_set(td, &sc->sc_lock);
  606         SDT_PROBE0(sched, , , sleep);
  607         TD_SET_SLEEPING(td);
  608         mi_switch(SW_VOL | SWT_SLEEPQ);
  609         KASSERT(TD_IS_RUNNING(td), ("running but not TDS_RUNNING"));
  610         CTR3(KTR_PROC, "sleepq resume: thread %p (pid %ld, %s)",
  611             (void *)td, (long)td->td_proc->p_pid, (void *)td->td_name);
  612 }
  613 
  614 /*
  615  * Check to see if we timed out.
  616  */
  617 static inline int
  618 sleepq_check_timeout(void)
  619 {
  620         struct thread *td;
  621         int res;
  622 
  623         res = 0;
  624         td = curthread;
  625         if (td->td_sleeptimo != 0) {
  626                 if (td->td_sleeptimo <= sbinuptime())
  627                         res = EWOULDBLOCK;
  628                 td->td_sleeptimo = 0;
  629         }
  630         return (res);
  631 }
  632 
  633 /*
  634  * Check to see if we were awoken by a signal.
  635  */
  636 static inline int
  637 sleepq_check_signals(void)
  638 {
  639         struct thread *td;
  640 
  641         td = curthread;
  642         KASSERT((td->td_flags & TDF_SINTR) == 0,
  643             ("thread %p still in interruptible sleep?", td));
  644 
  645         return (td->td_intrval);
  646 }
  647 
  648 /*
  649  * Block the current thread until it is awakened from its sleep queue.
  650  */
  651 void
  652 sleepq_wait(const void *wchan, int pri)
  653 {
  654         struct thread *td;
  655 
  656         td = curthread;
  657         MPASS(!(td->td_flags & TDF_SINTR));
  658         thread_lock(td);
  659         sleepq_switch(wchan, pri);
  660 }
  661 
  662 /*
  663  * Block the current thread until it is awakened from its sleep queue
  664  * or it is interrupted by a signal.
  665  */
  666 int
  667 sleepq_wait_sig(const void *wchan, int pri)
  668 {
  669         int rcatch;
  670 
  671         rcatch = sleepq_catch_signals(wchan, pri);
  672         if (rcatch)
  673                 return (rcatch);
  674         return (sleepq_check_signals());
  675 }
  676 
  677 /*
  678  * Block the current thread until it is awakened from its sleep queue
  679  * or it times out while waiting.
  680  */
  681 int
  682 sleepq_timedwait(const void *wchan, int pri)
  683 {
  684         struct thread *td;
  685 
  686         td = curthread;
  687         MPASS(!(td->td_flags & TDF_SINTR));
  688 
  689         thread_lock(td);
  690         sleepq_switch(wchan, pri);
  691 
  692         return (sleepq_check_timeout());
  693 }
  694 
  695 /*
  696  * Block the current thread until it is awakened from its sleep queue,
  697  * it is interrupted by a signal, or it times out waiting to be awakened.
  698  */
  699 int
  700 sleepq_timedwait_sig(const void *wchan, int pri)
  701 {
  702         int rcatch, rvalt, rvals;
  703 
  704         rcatch = sleepq_catch_signals(wchan, pri);
  705         /* We must always call check_timeout() to clear sleeptimo. */
  706         rvalt = sleepq_check_timeout();
  707         rvals = sleepq_check_signals();
  708         if (rcatch)
  709                 return (rcatch);
  710         if (rvals)
  711                 return (rvals);
  712         return (rvalt);
  713 }
  714 
  715 /*
  716  * Returns the type of sleepqueue given a waitchannel.
  717  */
  718 int
  719 sleepq_type(const void *wchan)
  720 {
  721         struct sleepqueue *sq;
  722         int type;
  723 
  724         MPASS(wchan != NULL);
  725 
  726         sq = sleepq_lookup(wchan);
  727         if (sq == NULL)
  728                 return (-1);
  729         type = sq->sq_type;
  730 
  731         return (type);
  732 }
  733 
  734 /*
  735  * Removes a thread from a sleep queue and makes it
  736  * runnable.
  737  *
  738  * Requires the sc chain locked on entry.  If SRQ_HOLD is specified it will
  739  * be locked on return.  Returns without the thread lock held.
  740  */
  741 static int
  742 sleepq_resume_thread(struct sleepqueue *sq, struct thread *td, int pri,
  743     int srqflags)
  744 {
  745         struct sleepqueue_chain *sc;
  746         bool drop;
  747 
  748         MPASS(td != NULL);
  749         MPASS(sq->sq_wchan != NULL);
  750         MPASS(td->td_wchan == sq->sq_wchan);
  751 
  752         sc = SC_LOOKUP(sq->sq_wchan);
  753         mtx_assert(&sc->sc_lock, MA_OWNED);
  754 
  755         /*
  756          * Avoid recursing on the chain lock.  If the locks don't match we
  757          * need to acquire the thread lock which setrunnable will drop for
  758          * us.  In this case we need to drop the chain lock afterwards.
  759          *
  760          * There is no race that will make td_lock equal to sc_lock because
  761          * we hold sc_lock.
  762          */
  763         drop = false;
  764         if (!TD_IS_SLEEPING(td)) {
  765                 thread_lock(td);
  766                 drop = true;
  767         } else
  768                 thread_lock_block_wait(td);
  769 
  770         /* Remove thread from the sleepq. */
  771         sleepq_remove_thread(sq, td);
  772 
  773         /* If we're done with the sleepqueue release it. */
  774         if ((srqflags & SRQ_HOLD) == 0 && drop)
  775                 mtx_unlock_spin(&sc->sc_lock);
  776 
  777         /* Adjust priority if requested. */
  778         MPASS(pri == 0 || (pri >= PRI_MIN && pri <= PRI_MAX));
  779         if (pri != 0 && td->td_priority > pri &&
  780             PRI_BASE(td->td_pri_class) == PRI_TIMESHARE)
  781                 sched_prio(td, pri);
  782 
  783         /*
  784          * Note that thread td might not be sleeping if it is running
  785          * sleepq_catch_signals() on another CPU or is blocked on its
  786          * proc lock to check signals.  There's no need to mark the
  787          * thread runnable in that case.
  788          */
  789         if (TD_IS_SLEEPING(td)) {
  790                 MPASS(!drop);
  791                 TD_CLR_SLEEPING(td);
  792                 return (setrunnable(td, srqflags));
  793         }
  794         MPASS(drop);
  795         thread_unlock(td);
  796 
  797         return (0);
  798 }
  799 
  800 static void
  801 sleepq_remove_thread(struct sleepqueue *sq, struct thread *td)
  802 {
  803         struct sleepqueue_chain *sc __unused;
  804 
  805         MPASS(td != NULL);
  806         MPASS(sq->sq_wchan != NULL);
  807         MPASS(td->td_wchan == sq->sq_wchan);
  808         MPASS(td->td_sqqueue < NR_SLEEPQS && td->td_sqqueue >= 0);
  809         THREAD_LOCK_ASSERT(td, MA_OWNED);
  810         sc = SC_LOOKUP(sq->sq_wchan);
  811         mtx_assert(&sc->sc_lock, MA_OWNED);
  812 
  813         SDT_PROBE2(sched, , , wakeup, td, td->td_proc);
  814 
  815         /* Remove the thread from the queue. */
  816         sq->sq_blockedcnt[td->td_sqqueue]--;
  817         TAILQ_REMOVE(&sq->sq_blocked[td->td_sqqueue], td, td_slpq);
  818 
  819         /*
  820          * Get a sleep queue for this thread.  If this is the last waiter,
  821          * use the queue itself and take it out of the chain, otherwise,
  822          * remove a queue from the free list.
  823          */
  824         if (LIST_EMPTY(&sq->sq_free)) {
  825                 td->td_sleepqueue = sq;
  826 #ifdef INVARIANTS
  827                 sq->sq_wchan = NULL;
  828 #endif
  829 #ifdef SLEEPQUEUE_PROFILING
  830                 sc->sc_depth--;
  831 #endif
  832         } else
  833                 td->td_sleepqueue = LIST_FIRST(&sq->sq_free);
  834         LIST_REMOVE(td->td_sleepqueue, sq_hash);
  835 
  836         if ((td->td_flags & TDF_TIMEOUT) == 0 && td->td_sleeptimo != 0)
  837                 /*
  838                  * We ignore the situation where timeout subsystem was
  839                  * unable to stop our callout.  The struct thread is
  840                  * type-stable, the callout will use the correct
  841                  * memory when running.  The checks of the
  842                  * td_sleeptimo value in this function and in
  843                  * sleepq_timeout() ensure that the thread does not
  844                  * get spurious wakeups, even if the callout was reset
  845                  * or thread reused.
  846                  */
  847                 callout_stop(&td->td_slpcallout);
  848 
  849         td->td_wmesg = NULL;
  850         td->td_wchan = NULL;
  851         td->td_flags &= ~(TDF_SINTR | TDF_TIMEOUT);
  852 
  853         CTR3(KTR_PROC, "sleepq_wakeup: thread %p (pid %ld, %s)",
  854             (void *)td, (long)td->td_proc->p_pid, td->td_name);
  855 }
  856 
  857 void
  858 sleepq_remove_nested(struct thread *td)
  859 {
  860         struct sleepqueue_chain *sc;
  861         struct sleepqueue *sq;
  862         const void *wchan;
  863 
  864         MPASS(TD_ON_SLEEPQ(td));
  865 
  866         wchan = td->td_wchan;
  867         sc = SC_LOOKUP(wchan);
  868         mtx_lock_spin(&sc->sc_lock);
  869         sq = sleepq_lookup(wchan);
  870         MPASS(sq != NULL);
  871         thread_lock(td);
  872         sleepq_remove_thread(sq, td);
  873         mtx_unlock_spin(&sc->sc_lock);
  874         /* Returns with the thread lock owned. */
  875 }
  876 
  877 #ifdef INVARIANTS
  878 /*
  879  * UMA zone item deallocator.
  880  */
  881 static void
  882 sleepq_dtor(void *mem, int size, void *arg)
  883 {
  884         struct sleepqueue *sq;
  885         int i;
  886 
  887         sq = mem;
  888         for (i = 0; i < NR_SLEEPQS; i++) {
  889                 MPASS(TAILQ_EMPTY(&sq->sq_blocked[i]));
  890                 MPASS(sq->sq_blockedcnt[i] == 0);
  891         }
  892 }
  893 #endif
  894 
  895 /*
  896  * UMA zone item initializer.
  897  */
  898 static int
  899 sleepq_init(void *mem, int size, int flags)
  900 {
  901         struct sleepqueue *sq;
  902         int i;
  903 
  904         bzero(mem, size);
  905         sq = mem;
  906         for (i = 0; i < NR_SLEEPQS; i++) {
  907                 TAILQ_INIT(&sq->sq_blocked[i]);
  908                 sq->sq_blockedcnt[i] = 0;
  909         }
  910         LIST_INIT(&sq->sq_free);
  911         return (0);
  912 }
  913 
  914 /*
  915  * Find thread sleeping on a wait channel and resume it.
  916  */
  917 int
  918 sleepq_signal(const void *wchan, int flags, int pri, int queue)
  919 {
  920         struct sleepqueue_chain *sc;
  921         struct sleepqueue *sq;
  922         struct threadqueue *head;
  923         struct thread *td, *besttd;
  924         int wakeup_swapper;
  925 
  926         CTR2(KTR_PROC, "sleepq_signal(%p, %d)", wchan, flags);
  927         KASSERT(wchan != NULL, ("%s: invalid NULL wait channel", __func__));
  928         MPASS((queue >= 0) && (queue < NR_SLEEPQS));
  929         sq = sleepq_lookup(wchan);
  930         if (sq == NULL) {
  931                 if (flags & SLEEPQ_DROP)
  932                         sleepq_release(wchan);
  933                 return (0);
  934         }
  935         KASSERT(sq->sq_type == (flags & SLEEPQ_TYPE),
  936             ("%s: mismatch between sleep/wakeup and cv_*", __func__));
  937 
  938         head = &sq->sq_blocked[queue];
  939         if (flags & SLEEPQ_UNFAIR) {
  940                 /*
  941                  * Find the most recently sleeping thread, but try to
  942                  * skip threads still in process of context switch to
  943                  * avoid spinning on the thread lock.
  944                  */
  945                 sc = SC_LOOKUP(wchan);
  946                 besttd = TAILQ_LAST_FAST(head, thread, td_slpq);
  947                 while (besttd->td_lock != &sc->sc_lock) {
  948                         td = TAILQ_PREV_FAST(besttd, head, thread, td_slpq);
  949                         if (td == NULL)
  950                                 break;
  951                         besttd = td;
  952                 }
  953         } else {
  954                 /*
  955                  * Find the highest priority thread on the queue.  If there
  956                  * is a tie, use the thread that first appears in the queue
  957                  * as it has been sleeping the longest since threads are
  958                  * always added to the tail of sleep queues.
  959                  */
  960                 besttd = td = TAILQ_FIRST(head);
  961                 while ((td = TAILQ_NEXT(td, td_slpq)) != NULL) {
  962                         if (td->td_priority < besttd->td_priority)
  963                                 besttd = td;
  964                 }
  965         }
  966         MPASS(besttd != NULL);
  967         wakeup_swapper = sleepq_resume_thread(sq, besttd, pri,
  968             (flags & SLEEPQ_DROP) ? 0 : SRQ_HOLD);
  969         return (wakeup_swapper);
  970 }
  971 
  972 static bool
  973 match_any(struct thread *td __unused)
  974 {
  975 
  976         return (true);
  977 }
  978 
  979 /*
  980  * Resume all threads sleeping on a specified wait channel.
  981  */
  982 int
  983 sleepq_broadcast(const void *wchan, int flags, int pri, int queue)
  984 {
  985         struct sleepqueue *sq;
  986 
  987         CTR2(KTR_PROC, "sleepq_broadcast(%p, %d)", wchan, flags);
  988         KASSERT(wchan != NULL, ("%s: invalid NULL wait channel", __func__));
  989         MPASS((queue >= 0) && (queue < NR_SLEEPQS));
  990         sq = sleepq_lookup(wchan);
  991         if (sq == NULL)
  992                 return (0);
  993         KASSERT(sq->sq_type == (flags & SLEEPQ_TYPE),
  994             ("%s: mismatch between sleep/wakeup and cv_*", __func__));
  995 
  996         return (sleepq_remove_matching(sq, queue, match_any, pri));
  997 }
  998 
  999 /*
 1000  * Resume threads on the sleep queue that match the given predicate.
 1001  */
 1002 int
 1003 sleepq_remove_matching(struct sleepqueue *sq, int queue,
 1004     bool (*matches)(struct thread *), int pri)
 1005 {
 1006         struct thread *td, *tdn;
 1007         int wakeup_swapper;
 1008 
 1009         /*
 1010          * The last thread will be given ownership of sq and may
 1011          * re-enqueue itself before sleepq_resume_thread() returns,
 1012          * so we must cache the "next" queue item at the beginning
 1013          * of the final iteration.
 1014          */
 1015         wakeup_swapper = 0;
 1016         TAILQ_FOREACH_SAFE(td, &sq->sq_blocked[queue], td_slpq, tdn) {
 1017                 if (matches(td))
 1018                         wakeup_swapper |= sleepq_resume_thread(sq, td, pri,
 1019                             SRQ_HOLD);
 1020         }
 1021 
 1022         return (wakeup_swapper);
 1023 }
 1024 
 1025 /*
 1026  * Time sleeping threads out.  When the timeout expires, the thread is
 1027  * removed from the sleep queue and made runnable if it is still asleep.
 1028  */
 1029 static void
 1030 sleepq_timeout(void *arg)
 1031 {
 1032         struct sleepqueue_chain *sc __unused;
 1033         struct sleepqueue *sq;
 1034         struct thread *td;
 1035         const void *wchan;
 1036         int wakeup_swapper;
 1037 
 1038         td = arg;
 1039         CTR3(KTR_PROC, "sleepq_timeout: thread %p (pid %ld, %s)",
 1040             (void *)td, (long)td->td_proc->p_pid, (void *)td->td_name);
 1041 
 1042         thread_lock(td);
 1043         if (td->td_sleeptimo == 0 ||
 1044             td->td_sleeptimo > td->td_slpcallout.c_time) {
 1045                 /*
 1046                  * The thread does not want a timeout (yet).
 1047                  */
 1048         } else if (TD_IS_SLEEPING(td) && TD_ON_SLEEPQ(td)) {
 1049                 /*
 1050                  * See if the thread is asleep and get the wait
 1051                  * channel if it is.
 1052                  */
 1053                 wchan = td->td_wchan;
 1054                 sc = SC_LOOKUP(wchan);
 1055                 THREAD_LOCKPTR_ASSERT(td, &sc->sc_lock);
 1056                 sq = sleepq_lookup(wchan);
 1057                 MPASS(sq != NULL);
 1058                 td->td_flags |= TDF_TIMEOUT;
 1059                 wakeup_swapper = sleepq_resume_thread(sq, td, 0, 0);
 1060                 if (wakeup_swapper)
 1061                         kick_proc0();
 1062                 return;
 1063         } else if (TD_ON_SLEEPQ(td)) {
 1064                 /*
 1065                  * If the thread is on the SLEEPQ but isn't sleeping
 1066                  * yet, it can either be on another CPU in between
 1067                  * sleepq_add() and one of the sleepq_*wait*()
 1068                  * routines or it can be in sleepq_catch_signals().
 1069                  */
 1070                 td->td_flags |= TDF_TIMEOUT;
 1071         }
 1072         thread_unlock(td);
 1073 }
 1074 
 1075 /*
 1076  * Resumes a specific thread from the sleep queue associated with a specific
 1077  * wait channel if it is on that queue.
 1078  */
 1079 void
 1080 sleepq_remove(struct thread *td, const void *wchan)
 1081 {
 1082         struct sleepqueue_chain *sc;
 1083         struct sleepqueue *sq;
 1084         int wakeup_swapper;
 1085 
 1086         /*
 1087          * Look up the sleep queue for this wait channel, then re-check
 1088          * that the thread is asleep on that channel, if it is not, then
 1089          * bail.
 1090          */
 1091         MPASS(wchan != NULL);
 1092         sc = SC_LOOKUP(wchan);
 1093         mtx_lock_spin(&sc->sc_lock);
 1094         /*
 1095          * We can not lock the thread here as it may be sleeping on a
 1096          * different sleepq.  However, holding the sleepq lock for this
 1097          * wchan can guarantee that we do not miss a wakeup for this
 1098          * channel.  The asserts below will catch any false positives.
 1099          */
 1100         if (!TD_ON_SLEEPQ(td) || td->td_wchan != wchan) {
 1101                 mtx_unlock_spin(&sc->sc_lock);
 1102                 return;
 1103         }
 1104 
 1105         /* Thread is asleep on sleep queue sq, so wake it up. */
 1106         sq = sleepq_lookup(wchan);
 1107         MPASS(sq != NULL);
 1108         MPASS(td->td_wchan == wchan);
 1109         wakeup_swapper = sleepq_resume_thread(sq, td, 0, 0);
 1110         if (wakeup_swapper)
 1111                 kick_proc0();
 1112 }
 1113 
 1114 /*
 1115  * Abort a thread as if an interrupt had occurred.  Only abort
 1116  * interruptible waits (unfortunately it isn't safe to abort others).
 1117  *
 1118  * Requires thread lock on entry, releases on return.
 1119  */
 1120 int
 1121 sleepq_abort(struct thread *td, int intrval)
 1122 {
 1123         struct sleepqueue *sq;
 1124         const void *wchan;
 1125 
 1126         THREAD_LOCK_ASSERT(td, MA_OWNED);
 1127         MPASS(TD_ON_SLEEPQ(td));
 1128         MPASS(td->td_flags & TDF_SINTR);
 1129         MPASS((intrval == 0 && (td->td_flags & TDF_SIGWAIT) != 0) ||
 1130             intrval == EINTR || intrval == ERESTART);
 1131 
 1132         /*
 1133          * If the TDF_TIMEOUT flag is set, just leave. A
 1134          * timeout is scheduled anyhow.
 1135          */
 1136         if (td->td_flags & TDF_TIMEOUT) {
 1137                 thread_unlock(td);
 1138                 return (0);
 1139         }
 1140 
 1141         CTR3(KTR_PROC, "sleepq_abort: thread %p (pid %ld, %s)",
 1142             (void *)td, (long)td->td_proc->p_pid, (void *)td->td_name);
 1143         td->td_intrval = intrval;
 1144 
 1145         /*
 1146          * If the thread has not slept yet it will find the signal in
 1147          * sleepq_catch_signals() and call sleepq_resume_thread.  Otherwise
 1148          * we have to do it here.
 1149          */
 1150         if (!TD_IS_SLEEPING(td)) {
 1151                 thread_unlock(td);
 1152                 return (0);
 1153         }
 1154         wchan = td->td_wchan;
 1155         MPASS(wchan != NULL);
 1156         sq = sleepq_lookup(wchan);
 1157         MPASS(sq != NULL);
 1158 
 1159         /* Thread is asleep on sleep queue sq, so wake it up. */
 1160         return (sleepq_resume_thread(sq, td, 0, 0));
 1161 }
 1162 
 1163 void
 1164 sleepq_chains_remove_matching(bool (*matches)(struct thread *))
 1165 {
 1166         struct sleepqueue_chain *sc;
 1167         struct sleepqueue *sq, *sq1;
 1168         int i, wakeup_swapper;
 1169 
 1170         wakeup_swapper = 0;
 1171         for (sc = &sleepq_chains[0]; sc < sleepq_chains + SC_TABLESIZE; ++sc) {
 1172                 if (LIST_EMPTY(&sc->sc_queues)) {
 1173                         continue;
 1174                 }
 1175                 mtx_lock_spin(&sc->sc_lock);
 1176                 LIST_FOREACH_SAFE(sq, &sc->sc_queues, sq_hash, sq1) {
 1177                         for (i = 0; i < NR_SLEEPQS; ++i) {
 1178                                 wakeup_swapper |= sleepq_remove_matching(sq, i,
 1179                                     matches, 0);
 1180                         }
 1181                 }
 1182                 mtx_unlock_spin(&sc->sc_lock);
 1183         }
 1184         if (wakeup_swapper) {
 1185                 kick_proc0();
 1186         }
 1187 }
 1188 
 1189 /*
 1190  * Prints the stacks of all threads presently sleeping on wchan/queue to
 1191  * the sbuf sb.  Sets count_stacks_printed to the number of stacks actually
 1192  * printed.  Typically, this will equal the number of threads sleeping on the
 1193  * queue, but may be less if sb overflowed before all stacks were printed.
 1194  */
 1195 #ifdef STACK
 1196 int
 1197 sleepq_sbuf_print_stacks(struct sbuf *sb, const void *wchan, int queue,
 1198     int *count_stacks_printed)
 1199 {
 1200         struct thread *td, *td_next;
 1201         struct sleepqueue *sq;
 1202         struct stack **st;
 1203         struct sbuf **td_infos;
 1204         int i, stack_idx, error, stacks_to_allocate;
 1205         bool finished;
 1206 
 1207         error = 0;
 1208         finished = false;
 1209 
 1210         KASSERT(wchan != NULL, ("%s: invalid NULL wait channel", __func__));
 1211         MPASS((queue >= 0) && (queue < NR_SLEEPQS));
 1212 
 1213         stacks_to_allocate = 10;
 1214         for (i = 0; i < 3 && !finished ; i++) {
 1215                 /* We cannot malloc while holding the queue's spinlock, so
 1216                  * we do our mallocs now, and hope it is enough.  If it
 1217                  * isn't, we will free these, drop the lock, malloc more,
 1218                  * and try again, up to a point.  After that point we will
 1219                  * give up and report ENOMEM. We also cannot write to sb
 1220                  * during this time since the client may have set the
 1221                  * SBUF_AUTOEXTEND flag on their sbuf, which could cause a
 1222                  * malloc as we print to it.  So we defer actually printing
 1223                  * to sb until after we drop the spinlock.
 1224                  */
 1225 
 1226                 /* Where we will store the stacks. */
 1227                 st = malloc(sizeof(struct stack *) * stacks_to_allocate,
 1228                     M_TEMP, M_WAITOK);
 1229                 for (stack_idx = 0; stack_idx < stacks_to_allocate;
 1230                     stack_idx++)
 1231                         st[stack_idx] = stack_create(M_WAITOK);
 1232 
 1233                 /* Where we will store the td name, tid, etc. */
 1234                 td_infos = malloc(sizeof(struct sbuf *) * stacks_to_allocate,
 1235                     M_TEMP, M_WAITOK);
 1236                 for (stack_idx = 0; stack_idx < stacks_to_allocate;
 1237                     stack_idx++)
 1238                         td_infos[stack_idx] = sbuf_new(NULL, NULL,
 1239                             MAXCOMLEN + sizeof(struct thread *) * 2 + 40,
 1240                             SBUF_FIXEDLEN);
 1241 
 1242                 sleepq_lock(wchan);
 1243                 sq = sleepq_lookup(wchan);
 1244                 if (sq == NULL) {
 1245                         /* This sleepq does not exist; exit and return ENOENT. */
 1246                         error = ENOENT;
 1247                         finished = true;
 1248                         sleepq_release(wchan);
 1249                         goto loop_end;
 1250                 }
 1251 
 1252                 stack_idx = 0;
 1253                 /* Save thread info */
 1254                 TAILQ_FOREACH_SAFE(td, &sq->sq_blocked[queue], td_slpq,
 1255                     td_next) {
 1256                         if (stack_idx >= stacks_to_allocate)
 1257                                 goto loop_end;
 1258 
 1259                         /* Note the td_lock is equal to the sleepq_lock here. */
 1260                         (void)stack_save_td(st[stack_idx], td);
 1261 
 1262                         sbuf_printf(td_infos[stack_idx], "%d: %s %p",
 1263                             td->td_tid, td->td_name, td);
 1264 
 1265                         ++stack_idx;
 1266                 }
 1267 
 1268                 finished = true;
 1269                 sleepq_release(wchan);
 1270 
 1271                 /* Print the stacks */
 1272                 for (i = 0; i < stack_idx; i++) {
 1273                         sbuf_finish(td_infos[i]);
 1274                         sbuf_printf(sb, "--- thread %s: ---\n", sbuf_data(td_infos[i]));
 1275                         stack_sbuf_print(sb, st[i]);
 1276                         sbuf_printf(sb, "\n");
 1277 
 1278                         error = sbuf_error(sb);
 1279                         if (error == 0)
 1280                                 *count_stacks_printed = stack_idx;
 1281                 }
 1282 
 1283 loop_end:
 1284                 if (!finished)
 1285                         sleepq_release(wchan);
 1286                 for (stack_idx = 0; stack_idx < stacks_to_allocate;
 1287                     stack_idx++)
 1288                         stack_destroy(st[stack_idx]);
 1289                 for (stack_idx = 0; stack_idx < stacks_to_allocate;
 1290                     stack_idx++)
 1291                         sbuf_delete(td_infos[stack_idx]);
 1292                 free(st, M_TEMP);
 1293                 free(td_infos, M_TEMP);
 1294                 stacks_to_allocate *= 10;
 1295         }
 1296 
 1297         if (!finished && error == 0)
 1298                 error = ENOMEM;
 1299 
 1300         return (error);
 1301 }
 1302 #endif
 1303 
 1304 #ifdef SLEEPQUEUE_PROFILING
 1305 #define SLEEPQ_PROF_LOCATIONS   1024
 1306 #define SLEEPQ_SBUFSIZE         512
 1307 struct sleepq_prof {
 1308         LIST_ENTRY(sleepq_prof) sp_link;
 1309         const char      *sp_wmesg;
 1310         long            sp_count;
 1311 };
 1312 
 1313 LIST_HEAD(sqphead, sleepq_prof);
 1314 
 1315 struct sqphead sleepq_prof_free;
 1316 struct sqphead sleepq_hash[SC_TABLESIZE];
 1317 static struct sleepq_prof sleepq_profent[SLEEPQ_PROF_LOCATIONS];
 1318 static struct mtx sleepq_prof_lock;
 1319 MTX_SYSINIT(sleepq_prof_lock, &sleepq_prof_lock, "sleepq_prof", MTX_SPIN);
 1320 
 1321 static void
 1322 sleepq_profile(const char *wmesg)
 1323 {
 1324         struct sleepq_prof *sp;
 1325 
 1326         mtx_lock_spin(&sleepq_prof_lock);
 1327         if (prof_enabled == 0)
 1328                 goto unlock;
 1329         LIST_FOREACH(sp, &sleepq_hash[SC_HASH(wmesg)], sp_link)
 1330                 if (sp->sp_wmesg == wmesg)
 1331                         goto done;
 1332         sp = LIST_FIRST(&sleepq_prof_free);
 1333         if (sp == NULL)
 1334                 goto unlock;
 1335         sp->sp_wmesg = wmesg;
 1336         LIST_REMOVE(sp, sp_link);
 1337         LIST_INSERT_HEAD(&sleepq_hash[SC_HASH(wmesg)], sp, sp_link);
 1338 done:
 1339         sp->sp_count++;
 1340 unlock:
 1341         mtx_unlock_spin(&sleepq_prof_lock);
 1342         return;
 1343 }
 1344 
 1345 static void
 1346 sleepq_prof_reset(void)
 1347 {
 1348         struct sleepq_prof *sp;
 1349         int enabled;
 1350         int i;
 1351 
 1352         mtx_lock_spin(&sleepq_prof_lock);
 1353         enabled = prof_enabled;
 1354         prof_enabled = 0;
 1355         for (i = 0; i < SC_TABLESIZE; i++)
 1356                 LIST_INIT(&sleepq_hash[i]);
 1357         LIST_INIT(&sleepq_prof_free);
 1358         for (i = 0; i < SLEEPQ_PROF_LOCATIONS; i++) {
 1359                 sp = &sleepq_profent[i];
 1360                 sp->sp_wmesg = NULL;
 1361                 sp->sp_count = 0;
 1362                 LIST_INSERT_HEAD(&sleepq_prof_free, sp, sp_link);
 1363         }
 1364         prof_enabled = enabled;
 1365         mtx_unlock_spin(&sleepq_prof_lock);
 1366 }
 1367 
 1368 static int
 1369 enable_sleepq_prof(SYSCTL_HANDLER_ARGS)
 1370 {
 1371         int error, v;
 1372 
 1373         v = prof_enabled;
 1374         error = sysctl_handle_int(oidp, &v, v, req);
 1375         if (error)
 1376                 return (error);
 1377         if (req->newptr == NULL)
 1378                 return (error);
 1379         if (v == prof_enabled)
 1380                 return (0);
 1381         if (v == 1)
 1382                 sleepq_prof_reset();
 1383         mtx_lock_spin(&sleepq_prof_lock);
 1384         prof_enabled = !!v;
 1385         mtx_unlock_spin(&sleepq_prof_lock);
 1386 
 1387         return (0);
 1388 }
 1389 
 1390 static int
 1391 reset_sleepq_prof_stats(SYSCTL_HANDLER_ARGS)
 1392 {
 1393         int error, v;
 1394 
 1395         v = 0;
 1396         error = sysctl_handle_int(oidp, &v, 0, req);
 1397         if (error)
 1398                 return (error);
 1399         if (req->newptr == NULL)
 1400                 return (error);
 1401         if (v == 0)
 1402                 return (0);
 1403         sleepq_prof_reset();
 1404 
 1405         return (0);
 1406 }
 1407 
 1408 static int
 1409 dump_sleepq_prof_stats(SYSCTL_HANDLER_ARGS)
 1410 {
 1411         struct sleepq_prof *sp;
 1412         struct sbuf *sb;
 1413         int enabled;
 1414         int error;
 1415         int i;
 1416 
 1417         error = sysctl_wire_old_buffer(req, 0);
 1418         if (error != 0)
 1419                 return (error);
 1420         sb = sbuf_new_for_sysctl(NULL, NULL, SLEEPQ_SBUFSIZE, req);
 1421         sbuf_printf(sb, "\nwmesg\tcount\n");
 1422         enabled = prof_enabled;
 1423         mtx_lock_spin(&sleepq_prof_lock);
 1424         prof_enabled = 0;
 1425         mtx_unlock_spin(&sleepq_prof_lock);
 1426         for (i = 0; i < SC_TABLESIZE; i++) {
 1427                 LIST_FOREACH(sp, &sleepq_hash[i], sp_link) {
 1428                         sbuf_printf(sb, "%s\t%ld\n",
 1429                             sp->sp_wmesg, sp->sp_count);
 1430                 }
 1431         }
 1432         mtx_lock_spin(&sleepq_prof_lock);
 1433         prof_enabled = enabled;
 1434         mtx_unlock_spin(&sleepq_prof_lock);
 1435 
 1436         error = sbuf_finish(sb);
 1437         sbuf_delete(sb);
 1438         return (error);
 1439 }
 1440 
 1441 SYSCTL_PROC(_debug_sleepq, OID_AUTO, stats,
 1442     CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_NEEDGIANT, NULL, 0,
 1443     dump_sleepq_prof_stats, "A",
 1444     "Sleepqueue profiling statistics");
 1445 SYSCTL_PROC(_debug_sleepq, OID_AUTO, reset,
 1446     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, 0,
 1447     reset_sleepq_prof_stats, "I",
 1448     "Reset sleepqueue profiling statistics");
 1449 SYSCTL_PROC(_debug_sleepq, OID_AUTO, enable,
 1450     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, 0,
 1451     enable_sleepq_prof, "I",
 1452     "Enable sleepqueue profiling");
 1453 #endif
 1454 
 1455 #ifdef DDB
 1456 DB_SHOW_COMMAND(sleepq, db_show_sleepqueue)
 1457 {
 1458         struct sleepqueue_chain *sc;
 1459         struct sleepqueue *sq;
 1460 #ifdef INVARIANTS
 1461         struct lock_object *lock;
 1462 #endif
 1463         struct thread *td;
 1464         void *wchan;
 1465         int i;
 1466 
 1467         if (!have_addr)
 1468                 return;
 1469 
 1470         /*
 1471          * First, see if there is an active sleep queue for the wait channel
 1472          * indicated by the address.
 1473          */
 1474         wchan = (void *)addr;
 1475         sc = SC_LOOKUP(wchan);
 1476         LIST_FOREACH(sq, &sc->sc_queues, sq_hash)
 1477                 if (sq->sq_wchan == wchan)
 1478                         goto found;
 1479 
 1480         /*
 1481          * Second, see if there is an active sleep queue at the address
 1482          * indicated.
 1483          */
 1484         for (i = 0; i < SC_TABLESIZE; i++)
 1485                 LIST_FOREACH(sq, &sleepq_chains[i].sc_queues, sq_hash) {
 1486                         if (sq == (struct sleepqueue *)addr)
 1487                                 goto found;
 1488                 }
 1489 
 1490         db_printf("Unable to locate a sleep queue via %p\n", (void *)addr);
 1491         return;
 1492 found:
 1493         db_printf("Wait channel: %p\n", sq->sq_wchan);
 1494         db_printf("Queue type: %d\n", sq->sq_type);
 1495 #ifdef INVARIANTS
 1496         if (sq->sq_lock) {
 1497                 lock = sq->sq_lock;
 1498                 db_printf("Associated Interlock: %p - (%s) %s\n", lock,
 1499                     LOCK_CLASS(lock)->lc_name, lock->lo_name);
 1500         }
 1501 #endif
 1502         db_printf("Blocked threads:\n");
 1503         for (i = 0; i < NR_SLEEPQS; i++) {
 1504                 db_printf("\nQueue[%d]:\n", i);
 1505                 if (TAILQ_EMPTY(&sq->sq_blocked[i]))
 1506                         db_printf("\tempty\n");
 1507                 else
 1508                         TAILQ_FOREACH(td, &sq->sq_blocked[i],
 1509                                       td_slpq) {
 1510                                 db_printf("\t%p (tid %d, pid %d, \"%s\")\n", td,
 1511                                           td->td_tid, td->td_proc->p_pid,
 1512                                           td->td_name);
 1513                         }
 1514                 db_printf("(expected: %u)\n", sq->sq_blockedcnt[i]);
 1515         }
 1516 }
 1517 
 1518 /* Alias 'show sleepqueue' to 'show sleepq'. */
 1519 DB_SHOW_ALIAS(sleepqueue, db_show_sleepqueue);
 1520 #endif

Cache object: a3fbd6447f0305050ce3b599d8daef1d


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.