The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/subr_sleepqueue.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
    3  *
    4  * Copyright (c) 2004 John Baldwin <jhb@FreeBSD.org>
    5  *
    6  * Redistribution and use in source and binary forms, with or without
    7  * modification, are permitted provided that the following conditions
    8  * are met:
    9  * 1. Redistributions of source code must retain the above copyright
   10  *    notice, this list of conditions and the following disclaimer.
   11  * 2. Redistributions in binary form must reproduce the above copyright
   12  *    notice, this list of conditions and the following disclaimer in the
   13  *    documentation and/or other materials provided with the distribution.
   14  *
   15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   25  * SUCH DAMAGE.
   26  */
   27 
   28 /*
   29  * Implementation of sleep queues used to hold queue of threads blocked on
   30  * a wait channel.  Sleep queues are different from turnstiles in that wait
   31  * channels are not owned by anyone, so there is no priority propagation.
   32  * Sleep queues can also provide a timeout and can also be interrupted by
   33  * signals.  That said, there are several similarities between the turnstile
   34  * and sleep queue implementations.  (Note: turnstiles were implemented
   35  * first.)  For example, both use a hash table of the same size where each
   36  * bucket is referred to as a "chain" that contains both a spin lock and
   37  * a linked list of queues.  An individual queue is located by using a hash
   38  * to pick a chain, locking the chain, and then walking the chain searching
   39  * for the queue.  This means that a wait channel object does not need to
   40  * embed its queue head just as locks do not embed their turnstile queue
   41  * head.  Threads also carry around a sleep queue that they lend to the
   42  * wait channel when blocking.  Just as in turnstiles, the queue includes
   43  * a free list of the sleep queues of other threads blocked on the same
   44  * wait channel in the case of multiple waiters.
   45  *
   46  * Some additional functionality provided by sleep queues include the
   47  * ability to set a timeout.  The timeout is managed using a per-thread
   48  * callout that resumes a thread if it is asleep.  A thread may also
   49  * catch signals while it is asleep (aka an interruptible sleep).  The
   50  * signal code uses sleepq_abort() to interrupt a sleeping thread.  Finally,
   51  * sleep queues also provide some extra assertions.  One is not allowed to
   52  * mix the sleep/wakeup and cv APIs for a given wait channel.  Also, one
   53  * must consistently use the same lock to synchronize with a wait channel,
   54  * though this check is currently only a warning for sleep/wakeup due to
   55  * pre-existing abuse of that API.  The same lock must also be held when
   56  * awakening threads, though that is currently only enforced for condition
   57  * variables.
   58  */
   59 
   60 #include <sys/cdefs.h>
   61 __FBSDID("$FreeBSD$");
   62 
   63 #include "opt_sleepqueue_profiling.h"
   64 #include "opt_ddb.h"
   65 #include "opt_sched.h"
   66 #include "opt_stack.h"
   67 
   68 #include <sys/param.h>
   69 #include <sys/systm.h>
   70 #include <sys/lock.h>
   71 #include <sys/kernel.h>
   72 #include <sys/ktr.h>
   73 #include <sys/mutex.h>
   74 #include <sys/proc.h>
   75 #include <sys/sbuf.h>
   76 #include <sys/sched.h>
   77 #include <sys/sdt.h>
   78 #include <sys/signalvar.h>
   79 #include <sys/sleepqueue.h>
   80 #include <sys/stack.h>
   81 #include <sys/sysctl.h>
   82 #include <sys/time.h>
   83 #ifdef EPOCH_TRACE
   84 #include <sys/epoch.h>
   85 #endif
   86 
   87 #include <machine/atomic.h>
   88 
   89 #include <vm/uma.h>
   90 
   91 #ifdef DDB
   92 #include <ddb/ddb.h>
   93 #endif
   94 
   95 /*
   96  * Constants for the hash table of sleep queue chains.
   97  * SC_TABLESIZE must be a power of two for SC_MASK to work properly.
   98  */
   99 #ifndef SC_TABLESIZE
  100 #define SC_TABLESIZE    256
  101 #endif
  102 CTASSERT(powerof2(SC_TABLESIZE));
  103 #define SC_MASK         (SC_TABLESIZE - 1)
  104 #define SC_SHIFT        8
  105 #define SC_HASH(wc)     ((((uintptr_t)(wc) >> SC_SHIFT) ^ (uintptr_t)(wc)) & \
  106                             SC_MASK)
  107 #define SC_LOOKUP(wc)   &sleepq_chains[SC_HASH(wc)]
  108 #define NR_SLEEPQS      2
  109 /*
  110  * There are two different lists of sleep queues.  Both lists are connected
  111  * via the sq_hash entries.  The first list is the sleep queue chain list
  112  * that a sleep queue is on when it is attached to a wait channel.  The
  113  * second list is the free list hung off of a sleep queue that is attached
  114  * to a wait channel.
  115  *
  116  * Each sleep queue also contains the wait channel it is attached to, the
  117  * list of threads blocked on that wait channel, flags specific to the
  118  * wait channel, and the lock used to synchronize with a wait channel.
  119  * The flags are used to catch mismatches between the various consumers
  120  * of the sleep queue API (e.g. sleep/wakeup and condition variables).
  121  * The lock pointer is only used when invariants are enabled for various
  122  * debugging checks.
  123  *
  124  * Locking key:
  125  *  c - sleep queue chain lock
  126  */
  127 struct sleepqueue {
  128         struct threadqueue sq_blocked[NR_SLEEPQS]; /* (c) Blocked threads. */
  129         u_int sq_blockedcnt[NR_SLEEPQS];        /* (c) N. of blocked threads. */
  130         LIST_ENTRY(sleepqueue) sq_hash;         /* (c) Chain and free list. */
  131         LIST_HEAD(, sleepqueue) sq_free;        /* (c) Free queues. */
  132         const void      *sq_wchan;              /* (c) Wait channel. */
  133         int     sq_type;                        /* (c) Queue type. */
  134 #ifdef INVARIANTS
  135         struct lock_object *sq_lock;            /* (c) Associated lock. */
  136 #endif
  137 };
  138 
  139 struct sleepqueue_chain {
  140         LIST_HEAD(, sleepqueue) sc_queues;      /* List of sleep queues. */
  141         struct mtx sc_lock;                     /* Spin lock for this chain. */
  142 #ifdef SLEEPQUEUE_PROFILING
  143         u_int   sc_depth;                       /* Length of sc_queues. */
  144         u_int   sc_max_depth;                   /* Max length of sc_queues. */
  145 #endif
  146 } __aligned(CACHE_LINE_SIZE);
  147 
  148 #ifdef SLEEPQUEUE_PROFILING
  149 u_int sleepq_max_depth;
  150 static SYSCTL_NODE(_debug, OID_AUTO, sleepq, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
  151     "sleepq profiling");
  152 static SYSCTL_NODE(_debug_sleepq, OID_AUTO, chains,
  153     CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
  154     "sleepq chain stats");
  155 SYSCTL_UINT(_debug_sleepq, OID_AUTO, max_depth, CTLFLAG_RD, &sleepq_max_depth,
  156     0, "maxmimum depth achieved of a single chain");
  157 
  158 static void     sleepq_profile(const char *wmesg);
  159 static int      prof_enabled;
  160 #endif
  161 static struct sleepqueue_chain sleepq_chains[SC_TABLESIZE];
  162 static uma_zone_t sleepq_zone;
  163 
  164 /*
  165  * Prototypes for non-exported routines.
  166  */
  167 static int      sleepq_catch_signals(const void *wchan, int pri);
  168 static inline int sleepq_check_signals(void);
  169 static inline int sleepq_check_timeout(void);
  170 #ifdef INVARIANTS
  171 static void     sleepq_dtor(void *mem, int size, void *arg);
  172 #endif
  173 static int      sleepq_init(void *mem, int size, int flags);
  174 static int      sleepq_resume_thread(struct sleepqueue *sq, struct thread *td,
  175                     int pri, int srqflags);
  176 static void     sleepq_remove_thread(struct sleepqueue *sq, struct thread *td);
  177 static void     sleepq_switch(const void *wchan, int pri);
  178 static void     sleepq_timeout(void *arg);
  179 
  180 SDT_PROBE_DECLARE(sched, , , sleep);
  181 SDT_PROBE_DECLARE(sched, , , wakeup);
  182 
  183 /*
  184  * Initialize SLEEPQUEUE_PROFILING specific sysctl nodes.
  185  * Note that it must happen after sleepinit() has been fully executed, so
  186  * it must happen after SI_SUB_KMEM SYSINIT() subsystem setup.
  187  */
  188 #ifdef SLEEPQUEUE_PROFILING
  189 static void
  190 init_sleepqueue_profiling(void)
  191 {
  192         char chain_name[10];
  193         struct sysctl_oid *chain_oid;
  194         u_int i;
  195 
  196         for (i = 0; i < SC_TABLESIZE; i++) {
  197                 snprintf(chain_name, sizeof(chain_name), "%u", i);
  198                 chain_oid = SYSCTL_ADD_NODE(NULL,
  199                     SYSCTL_STATIC_CHILDREN(_debug_sleepq_chains), OID_AUTO,
  200                     chain_name, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
  201                     "sleepq chain stats");
  202                 SYSCTL_ADD_UINT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO,
  203                     "depth", CTLFLAG_RD, &sleepq_chains[i].sc_depth, 0, NULL);
  204                 SYSCTL_ADD_UINT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO,
  205                     "max_depth", CTLFLAG_RD, &sleepq_chains[i].sc_max_depth, 0,
  206                     NULL);
  207         }
  208 }
  209 
  210 SYSINIT(sleepqueue_profiling, SI_SUB_LOCK, SI_ORDER_ANY,
  211     init_sleepqueue_profiling, NULL);
  212 #endif
  213 
  214 /*
  215  * Early initialization of sleep queues that is called from the sleepinit()
  216  * SYSINIT.
  217  */
  218 void
  219 init_sleepqueues(void)
  220 {
  221         int i;
  222 
  223         for (i = 0; i < SC_TABLESIZE; i++) {
  224                 LIST_INIT(&sleepq_chains[i].sc_queues);
  225                 mtx_init(&sleepq_chains[i].sc_lock, "sleepq chain", NULL,
  226                     MTX_SPIN);
  227         }
  228         sleepq_zone = uma_zcreate("SLEEPQUEUE", sizeof(struct sleepqueue),
  229 #ifdef INVARIANTS
  230             NULL, sleepq_dtor, sleepq_init, NULL, UMA_ALIGN_CACHE, 0);
  231 #else
  232             NULL, NULL, sleepq_init, NULL, UMA_ALIGN_CACHE, 0);
  233 #endif
  234 
  235         thread0.td_sleepqueue = sleepq_alloc();
  236 }
  237 
  238 /*
  239  * Get a sleep queue for a new thread.
  240  */
  241 struct sleepqueue *
  242 sleepq_alloc(void)
  243 {
  244 
  245         return (uma_zalloc(sleepq_zone, M_WAITOK));
  246 }
  247 
  248 /*
  249  * Free a sleep queue when a thread is destroyed.
  250  */
  251 void
  252 sleepq_free(struct sleepqueue *sq)
  253 {
  254 
  255         uma_zfree(sleepq_zone, sq);
  256 }
  257 
  258 /*
  259  * Lock the sleep queue chain associated with the specified wait channel.
  260  */
  261 void
  262 sleepq_lock(const void *wchan)
  263 {
  264         struct sleepqueue_chain *sc;
  265 
  266         sc = SC_LOOKUP(wchan);
  267         mtx_lock_spin(&sc->sc_lock);
  268 }
  269 
  270 /*
  271  * Look up the sleep queue associated with a given wait channel in the hash
  272  * table locking the associated sleep queue chain.  If no queue is found in
  273  * the table, NULL is returned.
  274  */
  275 struct sleepqueue *
  276 sleepq_lookup(const void *wchan)
  277 {
  278         struct sleepqueue_chain *sc;
  279         struct sleepqueue *sq;
  280 
  281         KASSERT(wchan != NULL, ("%s: invalid NULL wait channel", __func__));
  282         sc = SC_LOOKUP(wchan);
  283         mtx_assert(&sc->sc_lock, MA_OWNED);
  284         LIST_FOREACH(sq, &sc->sc_queues, sq_hash)
  285                 if (sq->sq_wchan == wchan)
  286                         return (sq);
  287         return (NULL);
  288 }
  289 
  290 /*
  291  * Unlock the sleep queue chain associated with a given wait channel.
  292  */
  293 void
  294 sleepq_release(const void *wchan)
  295 {
  296         struct sleepqueue_chain *sc;
  297 
  298         sc = SC_LOOKUP(wchan);
  299         mtx_unlock_spin(&sc->sc_lock);
  300 }
  301 
  302 /*
  303  * Places the current thread on the sleep queue for the specified wait
  304  * channel.  If INVARIANTS is enabled, then it associates the passed in
  305  * lock with the sleepq to make sure it is held when that sleep queue is
  306  * woken up.
  307  */
  308 void
  309 sleepq_add(const void *wchan, struct lock_object *lock, const char *wmesg,
  310     int flags, int queue)
  311 {
  312         struct sleepqueue_chain *sc;
  313         struct sleepqueue *sq;
  314         struct thread *td;
  315 
  316         td = curthread;
  317         sc = SC_LOOKUP(wchan);
  318         mtx_assert(&sc->sc_lock, MA_OWNED);
  319         MPASS(td->td_sleepqueue != NULL);
  320         MPASS(wchan != NULL);
  321         MPASS((queue >= 0) && (queue < NR_SLEEPQS));
  322 
  323         /* If this thread is not allowed to sleep, die a horrible death. */
  324         if (__predict_false(!THREAD_CAN_SLEEP())) {
  325 #ifdef EPOCH_TRACE
  326                 epoch_trace_list(curthread);
  327 #endif
  328                 KASSERT(0,
  329                     ("%s: td %p to sleep on wchan %p with sleeping prohibited",
  330                     __func__, td, wchan));
  331         }
  332 
  333         /* Look up the sleep queue associated with the wait channel 'wchan'. */
  334         sq = sleepq_lookup(wchan);
  335 
  336         /*
  337          * If the wait channel does not already have a sleep queue, use
  338          * this thread's sleep queue.  Otherwise, insert the current thread
  339          * into the sleep queue already in use by this wait channel.
  340          */
  341         if (sq == NULL) {
  342 #ifdef INVARIANTS
  343                 int i;
  344 
  345                 sq = td->td_sleepqueue;
  346                 for (i = 0; i < NR_SLEEPQS; i++) {
  347                         KASSERT(TAILQ_EMPTY(&sq->sq_blocked[i]),
  348                             ("thread's sleep queue %d is not empty", i));
  349                         KASSERT(sq->sq_blockedcnt[i] == 0,
  350                             ("thread's sleep queue %d count mismatches", i));
  351                 }
  352                 KASSERT(LIST_EMPTY(&sq->sq_free),
  353                     ("thread's sleep queue has a non-empty free list"));
  354                 KASSERT(sq->sq_wchan == NULL, ("stale sq_wchan pointer"));
  355                 sq->sq_lock = lock;
  356 #endif
  357 #ifdef SLEEPQUEUE_PROFILING
  358                 sc->sc_depth++;
  359                 if (sc->sc_depth > sc->sc_max_depth) {
  360                         sc->sc_max_depth = sc->sc_depth;
  361                         if (sc->sc_max_depth > sleepq_max_depth)
  362                                 sleepq_max_depth = sc->sc_max_depth;
  363                 }
  364 #endif
  365                 sq = td->td_sleepqueue;
  366                 LIST_INSERT_HEAD(&sc->sc_queues, sq, sq_hash);
  367                 sq->sq_wchan = wchan;
  368                 sq->sq_type = flags & SLEEPQ_TYPE;
  369         } else {
  370                 MPASS(wchan == sq->sq_wchan);
  371                 MPASS(lock == sq->sq_lock);
  372                 MPASS((flags & SLEEPQ_TYPE) == sq->sq_type);
  373                 LIST_INSERT_HEAD(&sq->sq_free, td->td_sleepqueue, sq_hash);
  374         }
  375         thread_lock(td);
  376         TAILQ_INSERT_TAIL(&sq->sq_blocked[queue], td, td_slpq);
  377         sq->sq_blockedcnt[queue]++;
  378         td->td_sleepqueue = NULL;
  379         td->td_sqqueue = queue;
  380         td->td_wchan = wchan;
  381         td->td_wmesg = wmesg;
  382         if (flags & SLEEPQ_INTERRUPTIBLE) {
  383                 td->td_intrval = 0;
  384                 td->td_flags |= TDF_SINTR;
  385         }
  386         td->td_flags &= ~TDF_TIMEOUT;
  387         thread_unlock(td);
  388 }
  389 
  390 /*
  391  * Sets a timeout that will remove the current thread from the specified
  392  * sleep queue after timo ticks if the thread has not already been awakened.
  393  */
  394 void
  395 sleepq_set_timeout_sbt(const void *wchan, sbintime_t sbt, sbintime_t pr,
  396     int flags)
  397 {
  398         struct sleepqueue_chain *sc __unused;
  399         struct thread *td;
  400         sbintime_t pr1;
  401 
  402         td = curthread;
  403         sc = SC_LOOKUP(wchan);
  404         mtx_assert(&sc->sc_lock, MA_OWNED);
  405         MPASS(TD_ON_SLEEPQ(td));
  406         MPASS(td->td_sleepqueue == NULL);
  407         MPASS(wchan != NULL);
  408         if (cold && td == &thread0)
  409                 panic("timed sleep before timers are working");
  410         KASSERT(td->td_sleeptimo == 0, ("td %d %p td_sleeptimo %jx",
  411             td->td_tid, td, (uintmax_t)td->td_sleeptimo));
  412         thread_lock(td);
  413         callout_when(sbt, pr, flags, &td->td_sleeptimo, &pr1);
  414         thread_unlock(td);
  415         callout_reset_sbt_on(&td->td_slpcallout, td->td_sleeptimo, pr1,
  416             sleepq_timeout, td, PCPU_GET(cpuid), flags | C_PRECALC |
  417             C_DIRECT_EXEC);
  418 }
  419 
  420 /*
  421  * Return the number of actual sleepers for the specified queue.
  422  */
  423 u_int
  424 sleepq_sleepcnt(const void *wchan, int queue)
  425 {
  426         struct sleepqueue *sq;
  427 
  428         KASSERT(wchan != NULL, ("%s: invalid NULL wait channel", __func__));
  429         MPASS((queue >= 0) && (queue < NR_SLEEPQS));
  430         sq = sleepq_lookup(wchan);
  431         if (sq == NULL)
  432                 return (0);
  433         return (sq->sq_blockedcnt[queue]);
  434 }
  435 
  436 static int
  437 sleepq_check_ast_sc_locked(struct thread *td, struct sleepqueue_chain *sc)
  438 {
  439         struct proc *p;
  440         int ret;
  441 
  442         mtx_assert(&sc->sc_lock, MA_OWNED);
  443 
  444         if ((td->td_pflags & TDP_WAKEUP) != 0) {
  445                 td->td_pflags &= ~TDP_WAKEUP;
  446                 thread_lock(td);
  447                 return (EINTR);
  448         }
  449 
  450         /*
  451          * See if there are any pending signals or suspension requests for this
  452          * thread.  If not, we can switch immediately.
  453          */
  454         thread_lock(td);
  455         if ((td->td_flags & (TDF_NEEDSIGCHK | TDF_NEEDSUSPCHK)) == 0)
  456                 return (0);
  457 
  458         thread_unlock(td);
  459         mtx_unlock_spin(&sc->sc_lock);
  460 
  461         p = td->td_proc;
  462         CTR3(KTR_PROC, "sleepq catching signals: thread %p (pid %ld, %s)",
  463             (void *)td, (long)p->p_pid, td->td_name);
  464         PROC_LOCK(p);
  465 
  466         /*
  467          * Check for suspension first. Checking for signals and then
  468          * suspending could result in a missed signal, since a signal
  469          * can be delivered while this thread is suspended.
  470          */
  471         ret = sig_ast_checksusp(td);
  472         if (ret != 0) {
  473                 PROC_UNLOCK(p);
  474                 mtx_lock_spin(&sc->sc_lock);
  475                 thread_lock(td);
  476                 return (ret);
  477         }
  478 
  479         ret = sig_ast_needsigchk(td);
  480 
  481         /*
  482          * Lock the per-process spinlock prior to dropping the
  483          * PROC_LOCK to avoid a signal delivery race.
  484          * PROC_LOCK, PROC_SLOCK, and thread_lock() are
  485          * currently held in tdsendsignal().
  486          */
  487         PROC_SLOCK(p);
  488         mtx_lock_spin(&sc->sc_lock);
  489         PROC_UNLOCK(p);
  490         thread_lock(td);
  491         PROC_SUNLOCK(p);
  492 
  493         return (ret);
  494 }
  495 
  496 /*
  497  * Marks the pending sleep of the current thread as interruptible and
  498  * makes an initial check for pending signals before putting a thread
  499  * to sleep. Enters and exits with the thread lock held.  Thread lock
  500  * may have transitioned from the sleepq lock to a run lock.
  501  */
  502 static int
  503 sleepq_catch_signals(const void *wchan, int pri)
  504 {
  505         struct thread *td;
  506         struct sleepqueue_chain *sc;
  507         struct sleepqueue *sq;
  508         int ret;
  509 
  510         sc = SC_LOOKUP(wchan);
  511         mtx_assert(&sc->sc_lock, MA_OWNED);
  512         MPASS(wchan != NULL);
  513         td = curthread;
  514 
  515         ret = sleepq_check_ast_sc_locked(td, sc);
  516         THREAD_LOCK_ASSERT(td, MA_OWNED);
  517         mtx_assert(&sc->sc_lock, MA_OWNED);
  518 
  519         if (ret == 0) {
  520                 /*
  521                  * No pending signals and no suspension requests found.
  522                  * Switch the thread off the cpu.
  523                  */
  524                 sleepq_switch(wchan, pri);
  525         } else {
  526                 /*
  527                  * There were pending signals and this thread is still
  528                  * on the sleep queue, remove it from the sleep queue.
  529                  */
  530                 if (TD_ON_SLEEPQ(td)) {
  531                         sq = sleepq_lookup(wchan);
  532                         sleepq_remove_thread(sq, td);
  533                 }
  534                 MPASS(td->td_lock != &sc->sc_lock);
  535                 mtx_unlock_spin(&sc->sc_lock);
  536                 thread_unlock(td);
  537         }
  538         return (ret);
  539 }
  540 
  541 /*
  542  * Switches to another thread if we are still asleep on a sleep queue.
  543  * Returns with thread lock.
  544  */
  545 static void
  546 sleepq_switch(const void *wchan, int pri)
  547 {
  548         struct sleepqueue_chain *sc;
  549         struct sleepqueue *sq;
  550         struct thread *td;
  551         bool rtc_changed;
  552 
  553         td = curthread;
  554         sc = SC_LOOKUP(wchan);
  555         mtx_assert(&sc->sc_lock, MA_OWNED);
  556         THREAD_LOCK_ASSERT(td, MA_OWNED);
  557 
  558         /*
  559          * If we have a sleep queue, then we've already been woken up, so
  560          * just return.
  561          */
  562         if (td->td_sleepqueue != NULL) {
  563                 mtx_unlock_spin(&sc->sc_lock);
  564                 thread_unlock(td);
  565                 return;
  566         }
  567 
  568         /*
  569          * If TDF_TIMEOUT is set, then our sleep has been timed out
  570          * already but we are still on the sleep queue, so dequeue the
  571          * thread and return.
  572          *
  573          * Do the same if the real-time clock has been adjusted since this
  574          * thread calculated its timeout based on that clock.  This handles
  575          * the following race:
  576          * - The Ts thread needs to sleep until an absolute real-clock time.
  577          *   It copies the global rtc_generation into curthread->td_rtcgen,
  578          *   reads the RTC, and calculates a sleep duration based on that time.
  579          *   See umtxq_sleep() for an example.
  580          * - The Tc thread adjusts the RTC, bumps rtc_generation, and wakes
  581          *   threads that are sleeping until an absolute real-clock time.
  582          *   See tc_setclock() and the POSIX specification of clock_settime().
  583          * - Ts reaches the code below.  It holds the sleepqueue chain lock,
  584          *   so Tc has finished waking, so this thread must test td_rtcgen.
  585          * (The declaration of td_rtcgen refers to this comment.)
  586          */
  587         rtc_changed = td->td_rtcgen != 0 && td->td_rtcgen != rtc_generation;
  588         if ((td->td_flags & TDF_TIMEOUT) || rtc_changed) {
  589                 if (rtc_changed) {
  590                         td->td_rtcgen = 0;
  591                 }
  592                 MPASS(TD_ON_SLEEPQ(td));
  593                 sq = sleepq_lookup(wchan);
  594                 sleepq_remove_thread(sq, td);
  595                 mtx_unlock_spin(&sc->sc_lock);
  596                 thread_unlock(td);
  597                 return;
  598         }
  599 #ifdef SLEEPQUEUE_PROFILING
  600         if (prof_enabled)
  601                 sleepq_profile(td->td_wmesg);
  602 #endif
  603         MPASS(td->td_sleepqueue == NULL);
  604         sched_sleep(td, pri);
  605         thread_lock_set(td, &sc->sc_lock);
  606         SDT_PROBE0(sched, , , sleep);
  607         TD_SET_SLEEPING(td);
  608         mi_switch(SW_VOL | SWT_SLEEPQ);
  609         KASSERT(TD_IS_RUNNING(td), ("running but not TDS_RUNNING"));
  610         CTR3(KTR_PROC, "sleepq resume: thread %p (pid %ld, %s)",
  611             (void *)td, (long)td->td_proc->p_pid, (void *)td->td_name);
  612 }
  613 
  614 /*
  615  * Check to see if we timed out.
  616  */
  617 static inline int
  618 sleepq_check_timeout(void)
  619 {
  620         struct thread *td;
  621         int res;
  622 
  623         res = 0;
  624         td = curthread;
  625         if (td->td_sleeptimo != 0) {
  626                 if (td->td_sleeptimo <= sbinuptime())
  627                         res = EWOULDBLOCK;
  628                 td->td_sleeptimo = 0;
  629         }
  630         return (res);
  631 }
  632 
  633 /*
  634  * Check to see if we were awoken by a signal.
  635  */
  636 static inline int
  637 sleepq_check_signals(void)
  638 {
  639         struct thread *td;
  640 
  641         td = curthread;
  642         KASSERT((td->td_flags & TDF_SINTR) == 0,
  643             ("thread %p still in interruptible sleep?", td));
  644 
  645         return (td->td_intrval);
  646 }
  647 
  648 /*
  649  * Block the current thread until it is awakened from its sleep queue.
  650  */
  651 void
  652 sleepq_wait(const void *wchan, int pri)
  653 {
  654         struct thread *td;
  655 
  656         td = curthread;
  657         MPASS(!(td->td_flags & TDF_SINTR));
  658         thread_lock(td);
  659         sleepq_switch(wchan, pri);
  660 }
  661 
  662 /*
  663  * Block the current thread until it is awakened from its sleep queue
  664  * or it is interrupted by a signal.
  665  */
  666 int
  667 sleepq_wait_sig(const void *wchan, int pri)
  668 {
  669         int rcatch;
  670 
  671         rcatch = sleepq_catch_signals(wchan, pri);
  672         if (rcatch)
  673                 return (rcatch);
  674         return (sleepq_check_signals());
  675 }
  676 
  677 /*
  678  * Block the current thread until it is awakened from its sleep queue
  679  * or it times out while waiting.
  680  */
  681 int
  682 sleepq_timedwait(const void *wchan, int pri)
  683 {
  684         struct thread *td;
  685 
  686         td = curthread;
  687         MPASS(!(td->td_flags & TDF_SINTR));
  688 
  689         thread_lock(td);
  690         sleepq_switch(wchan, pri);
  691 
  692         return (sleepq_check_timeout());
  693 }
  694 
  695 /*
  696  * Block the current thread until it is awakened from its sleep queue,
  697  * it is interrupted by a signal, or it times out waiting to be awakened.
  698  */
  699 int
  700 sleepq_timedwait_sig(const void *wchan, int pri)
  701 {
  702         int rcatch, rvalt, rvals;
  703 
  704         rcatch = sleepq_catch_signals(wchan, pri);
  705         /* We must always call check_timeout() to clear sleeptimo. */
  706         rvalt = sleepq_check_timeout();
  707         rvals = sleepq_check_signals();
  708         if (rcatch)
  709                 return (rcatch);
  710         if (rvals)
  711                 return (rvals);
  712         return (rvalt);
  713 }
  714 
  715 /*
  716  * Returns the type of sleepqueue given a waitchannel.
  717  */
  718 int
  719 sleepq_type(const void *wchan)
  720 {
  721         struct sleepqueue *sq;
  722         int type;
  723 
  724         MPASS(wchan != NULL);
  725 
  726         sq = sleepq_lookup(wchan);
  727         if (sq == NULL)
  728                 return (-1);
  729         type = sq->sq_type;
  730 
  731         return (type);
  732 }
  733 
  734 /*
  735  * Removes a thread from a sleep queue and makes it
  736  * runnable.
  737  *
  738  * Requires the sc chain locked on entry.  If SRQ_HOLD is specified it will
  739  * be locked on return.  Returns without the thread lock held.
  740  */
  741 static int
  742 sleepq_resume_thread(struct sleepqueue *sq, struct thread *td, int pri,
  743     int srqflags)
  744 {
  745         struct sleepqueue_chain *sc;
  746         bool drop;
  747 
  748         MPASS(td != NULL);
  749         MPASS(sq->sq_wchan != NULL);
  750         MPASS(td->td_wchan == sq->sq_wchan);
  751 
  752         sc = SC_LOOKUP(sq->sq_wchan);
  753         mtx_assert(&sc->sc_lock, MA_OWNED);
  754 
  755         /*
  756          * Avoid recursing on the chain lock.  If the locks don't match we
  757          * need to acquire the thread lock which setrunnable will drop for
  758          * us.  In this case we need to drop the chain lock afterwards.
  759          *
  760          * There is no race that will make td_lock equal to sc_lock because
  761          * we hold sc_lock.
  762          */
  763         drop = false;
  764         if (!TD_IS_SLEEPING(td)) {
  765                 thread_lock(td);
  766                 drop = true;
  767         } else
  768                 thread_lock_block_wait(td);
  769 
  770         /* Remove thread from the sleepq. */
  771         sleepq_remove_thread(sq, td);
  772 
  773         /* If we're done with the sleepqueue release it. */
  774         if ((srqflags & SRQ_HOLD) == 0 && drop)
  775                 mtx_unlock_spin(&sc->sc_lock);
  776 
  777         /* Adjust priority if requested. */
  778         MPASS(pri == 0 || (pri >= PRI_MIN && pri <= PRI_MAX));
  779         if (pri != 0 && td->td_priority > pri &&
  780             PRI_BASE(td->td_pri_class) == PRI_TIMESHARE)
  781                 sched_prio(td, pri);
  782 
  783         /*
  784          * Note that thread td might not be sleeping if it is running
  785          * sleepq_catch_signals() on another CPU or is blocked on its
  786          * proc lock to check signals.  There's no need to mark the
  787          * thread runnable in that case.
  788          */
  789         if (TD_IS_SLEEPING(td)) {
  790                 MPASS(!drop);
  791                 TD_CLR_SLEEPING(td);
  792                 return (setrunnable(td, srqflags));
  793         }
  794         MPASS(drop);
  795         thread_unlock(td);
  796 
  797         return (0);
  798 }
  799 
  800 static void
  801 sleepq_remove_thread(struct sleepqueue *sq, struct thread *td)
  802 {
  803         struct sleepqueue_chain *sc __unused;
  804 
  805         MPASS(td != NULL);
  806         MPASS(sq->sq_wchan != NULL);
  807         MPASS(td->td_wchan == sq->sq_wchan);
  808         MPASS(td->td_sqqueue < NR_SLEEPQS && td->td_sqqueue >= 0);
  809         THREAD_LOCK_ASSERT(td, MA_OWNED);
  810         sc = SC_LOOKUP(sq->sq_wchan);
  811         mtx_assert(&sc->sc_lock, MA_OWNED);
  812 
  813         SDT_PROBE2(sched, , , wakeup, td, td->td_proc);
  814 
  815         /* Remove the thread from the queue. */
  816         sq->sq_blockedcnt[td->td_sqqueue]--;
  817         TAILQ_REMOVE(&sq->sq_blocked[td->td_sqqueue], td, td_slpq);
  818 
  819         /*
  820          * Get a sleep queue for this thread.  If this is the last waiter,
  821          * use the queue itself and take it out of the chain, otherwise,
  822          * remove a queue from the free list.
  823          */
  824         if (LIST_EMPTY(&sq->sq_free)) {
  825                 td->td_sleepqueue = sq;
  826 #ifdef INVARIANTS
  827                 sq->sq_wchan = NULL;
  828 #endif
  829 #ifdef SLEEPQUEUE_PROFILING
  830                 sc->sc_depth--;
  831 #endif
  832         } else
  833                 td->td_sleepqueue = LIST_FIRST(&sq->sq_free);
  834         LIST_REMOVE(td->td_sleepqueue, sq_hash);
  835 
  836         if ((td->td_flags & TDF_TIMEOUT) == 0 && td->td_sleeptimo != 0)
  837                 /*
  838                  * We ignore the situation where timeout subsystem was
  839                  * unable to stop our callout.  The struct thread is
  840                  * type-stable, the callout will use the correct
  841                  * memory when running.  The checks of the
  842                  * td_sleeptimo value in this function and in
  843                  * sleepq_timeout() ensure that the thread does not
  844                  * get spurious wakeups, even if the callout was reset
  845                  * or thread reused.
  846                  */
  847                 callout_stop(&td->td_slpcallout);
  848 
  849         td->td_wmesg = NULL;
  850         td->td_wchan = NULL;
  851         td->td_flags &= ~(TDF_SINTR | TDF_TIMEOUT);
  852 
  853         CTR3(KTR_PROC, "sleepq_wakeup: thread %p (pid %ld, %s)",
  854             (void *)td, (long)td->td_proc->p_pid, td->td_name);
  855 }
  856 
  857 #ifdef INVARIANTS
  858 /*
  859  * UMA zone item deallocator.
  860  */
  861 static void
  862 sleepq_dtor(void *mem, int size, void *arg)
  863 {
  864         struct sleepqueue *sq;
  865         int i;
  866 
  867         sq = mem;
  868         for (i = 0; i < NR_SLEEPQS; i++) {
  869                 MPASS(TAILQ_EMPTY(&sq->sq_blocked[i]));
  870                 MPASS(sq->sq_blockedcnt[i] == 0);
  871         }
  872 }
  873 #endif
  874 
  875 /*
  876  * UMA zone item initializer.
  877  */
  878 static int
  879 sleepq_init(void *mem, int size, int flags)
  880 {
  881         struct sleepqueue *sq;
  882         int i;
  883 
  884         bzero(mem, size);
  885         sq = mem;
  886         for (i = 0; i < NR_SLEEPQS; i++) {
  887                 TAILQ_INIT(&sq->sq_blocked[i]);
  888                 sq->sq_blockedcnt[i] = 0;
  889         }
  890         LIST_INIT(&sq->sq_free);
  891         return (0);
  892 }
  893 
  894 /*
  895  * Find thread sleeping on a wait channel and resume it.
  896  */
  897 int
  898 sleepq_signal(const void *wchan, int flags, int pri, int queue)
  899 {
  900         struct sleepqueue_chain *sc;
  901         struct sleepqueue *sq;
  902         struct threadqueue *head;
  903         struct thread *td, *besttd;
  904         int wakeup_swapper;
  905 
  906         CTR2(KTR_PROC, "sleepq_signal(%p, %d)", wchan, flags);
  907         KASSERT(wchan != NULL, ("%s: invalid NULL wait channel", __func__));
  908         MPASS((queue >= 0) && (queue < NR_SLEEPQS));
  909         sq = sleepq_lookup(wchan);
  910         if (sq == NULL)
  911                 return (0);
  912         KASSERT(sq->sq_type == (flags & SLEEPQ_TYPE),
  913             ("%s: mismatch between sleep/wakeup and cv_*", __func__));
  914 
  915         head = &sq->sq_blocked[queue];
  916         if (flags & SLEEPQ_UNFAIR) {
  917                 /*
  918                  * Find the most recently sleeping thread, but try to
  919                  * skip threads still in process of context switch to
  920                  * avoid spinning on the thread lock.
  921                  */
  922                 sc = SC_LOOKUP(wchan);
  923                 besttd = TAILQ_LAST_FAST(head, thread, td_slpq);
  924                 while (besttd->td_lock != &sc->sc_lock) {
  925                         td = TAILQ_PREV_FAST(besttd, head, thread, td_slpq);
  926                         if (td == NULL)
  927                                 break;
  928                         besttd = td;
  929                 }
  930         } else {
  931                 /*
  932                  * Find the highest priority thread on the queue.  If there
  933                  * is a tie, use the thread that first appears in the queue
  934                  * as it has been sleeping the longest since threads are
  935                  * always added to the tail of sleep queues.
  936                  */
  937                 besttd = td = TAILQ_FIRST(head);
  938                 while ((td = TAILQ_NEXT(td, td_slpq)) != NULL) {
  939                         if (td->td_priority < besttd->td_priority)
  940                                 besttd = td;
  941                 }
  942         }
  943         MPASS(besttd != NULL);
  944         wakeup_swapper = sleepq_resume_thread(sq, besttd, pri, SRQ_HOLD);
  945         return (wakeup_swapper);
  946 }
  947 
  948 static bool
  949 match_any(struct thread *td __unused)
  950 {
  951 
  952         return (true);
  953 }
  954 
  955 /*
  956  * Resume all threads sleeping on a specified wait channel.
  957  */
  958 int
  959 sleepq_broadcast(const void *wchan, int flags, int pri, int queue)
  960 {
  961         struct sleepqueue *sq;
  962 
  963         CTR2(KTR_PROC, "sleepq_broadcast(%p, %d)", wchan, flags);
  964         KASSERT(wchan != NULL, ("%s: invalid NULL wait channel", __func__));
  965         MPASS((queue >= 0) && (queue < NR_SLEEPQS));
  966         sq = sleepq_lookup(wchan);
  967         if (sq == NULL)
  968                 return (0);
  969         KASSERT(sq->sq_type == (flags & SLEEPQ_TYPE),
  970             ("%s: mismatch between sleep/wakeup and cv_*", __func__));
  971 
  972         return (sleepq_remove_matching(sq, queue, match_any, pri));
  973 }
  974 
  975 /*
  976  * Resume threads on the sleep queue that match the given predicate.
  977  */
  978 int
  979 sleepq_remove_matching(struct sleepqueue *sq, int queue,
  980     bool (*matches)(struct thread *), int pri)
  981 {
  982         struct thread *td, *tdn;
  983         int wakeup_swapper;
  984 
  985         /*
  986          * The last thread will be given ownership of sq and may
  987          * re-enqueue itself before sleepq_resume_thread() returns,
  988          * so we must cache the "next" queue item at the beginning
  989          * of the final iteration.
  990          */
  991         wakeup_swapper = 0;
  992         TAILQ_FOREACH_SAFE(td, &sq->sq_blocked[queue], td_slpq, tdn) {
  993                 if (matches(td))
  994                         wakeup_swapper |= sleepq_resume_thread(sq, td, pri,
  995                             SRQ_HOLD);
  996         }
  997 
  998         return (wakeup_swapper);
  999 }
 1000 
 1001 /*
 1002  * Time sleeping threads out.  When the timeout expires, the thread is
 1003  * removed from the sleep queue and made runnable if it is still asleep.
 1004  */
 1005 static void
 1006 sleepq_timeout(void *arg)
 1007 {
 1008         struct sleepqueue_chain *sc __unused;
 1009         struct sleepqueue *sq;
 1010         struct thread *td;
 1011         const void *wchan;
 1012         int wakeup_swapper;
 1013 
 1014         td = arg;
 1015         CTR3(KTR_PROC, "sleepq_timeout: thread %p (pid %ld, %s)",
 1016             (void *)td, (long)td->td_proc->p_pid, (void *)td->td_name);
 1017 
 1018         thread_lock(td);
 1019         if (td->td_sleeptimo == 0 || td->td_sleeptimo > sbinuptime()) {
 1020                 /*
 1021                  * The thread does not want a timeout (yet).
 1022                  */
 1023         } else if (TD_IS_SLEEPING(td) && TD_ON_SLEEPQ(td)) {
 1024                 /*
 1025                  * See if the thread is asleep and get the wait
 1026                  * channel if it is.
 1027                  */
 1028                 wchan = td->td_wchan;
 1029                 sc = SC_LOOKUP(wchan);
 1030                 THREAD_LOCKPTR_ASSERT(td, &sc->sc_lock);
 1031                 sq = sleepq_lookup(wchan);
 1032                 MPASS(sq != NULL);
 1033                 td->td_flags |= TDF_TIMEOUT;
 1034                 wakeup_swapper = sleepq_resume_thread(sq, td, 0, 0);
 1035                 if (wakeup_swapper)
 1036                         kick_proc0();
 1037                 return;
 1038         } else if (TD_ON_SLEEPQ(td)) {
 1039                 /*
 1040                  * If the thread is on the SLEEPQ but isn't sleeping
 1041                  * yet, it can either be on another CPU in between
 1042                  * sleepq_add() and one of the sleepq_*wait*()
 1043                  * routines or it can be in sleepq_catch_signals().
 1044                  */
 1045                 td->td_flags |= TDF_TIMEOUT;
 1046         }
 1047         thread_unlock(td);
 1048 }
 1049 
 1050 /*
 1051  * Resumes a specific thread from the sleep queue associated with a specific
 1052  * wait channel if it is on that queue.
 1053  */
 1054 void
 1055 sleepq_remove(struct thread *td, const void *wchan)
 1056 {
 1057         struct sleepqueue_chain *sc;
 1058         struct sleepqueue *sq;
 1059         int wakeup_swapper;
 1060 
 1061         /*
 1062          * Look up the sleep queue for this wait channel, then re-check
 1063          * that the thread is asleep on that channel, if it is not, then
 1064          * bail.
 1065          */
 1066         MPASS(wchan != NULL);
 1067         sc = SC_LOOKUP(wchan);
 1068         mtx_lock_spin(&sc->sc_lock);
 1069         /*
 1070          * We can not lock the thread here as it may be sleeping on a
 1071          * different sleepq.  However, holding the sleepq lock for this
 1072          * wchan can guarantee that we do not miss a wakeup for this
 1073          * channel.  The asserts below will catch any false positives.
 1074          */
 1075         if (!TD_ON_SLEEPQ(td) || td->td_wchan != wchan) {
 1076                 mtx_unlock_spin(&sc->sc_lock);
 1077                 return;
 1078         }
 1079 
 1080         /* Thread is asleep on sleep queue sq, so wake it up. */
 1081         sq = sleepq_lookup(wchan);
 1082         MPASS(sq != NULL);
 1083         MPASS(td->td_wchan == wchan);
 1084         wakeup_swapper = sleepq_resume_thread(sq, td, 0, 0);
 1085         if (wakeup_swapper)
 1086                 kick_proc0();
 1087 }
 1088 
 1089 /*
 1090  * Abort a thread as if an interrupt had occurred.  Only abort
 1091  * interruptible waits (unfortunately it isn't safe to abort others).
 1092  *
 1093  * Requires thread lock on entry, releases on return.
 1094  */
 1095 int
 1096 sleepq_abort(struct thread *td, int intrval)
 1097 {
 1098         struct sleepqueue *sq;
 1099         const void *wchan;
 1100 
 1101         THREAD_LOCK_ASSERT(td, MA_OWNED);
 1102         MPASS(TD_ON_SLEEPQ(td));
 1103         MPASS(td->td_flags & TDF_SINTR);
 1104         MPASS(intrval == EINTR || intrval == ERESTART);
 1105 
 1106         /*
 1107          * If the TDF_TIMEOUT flag is set, just leave. A
 1108          * timeout is scheduled anyhow.
 1109          */
 1110         if (td->td_flags & TDF_TIMEOUT) {
 1111                 thread_unlock(td);
 1112                 return (0);
 1113         }
 1114 
 1115         CTR3(KTR_PROC, "sleepq_abort: thread %p (pid %ld, %s)",
 1116             (void *)td, (long)td->td_proc->p_pid, (void *)td->td_name);
 1117         td->td_intrval = intrval;
 1118 
 1119         /*
 1120          * If the thread has not slept yet it will find the signal in
 1121          * sleepq_catch_signals() and call sleepq_resume_thread.  Otherwise
 1122          * we have to do it here.
 1123          */
 1124         if (!TD_IS_SLEEPING(td)) {
 1125                 thread_unlock(td);
 1126                 return (0);
 1127         }
 1128         wchan = td->td_wchan;
 1129         MPASS(wchan != NULL);
 1130         sq = sleepq_lookup(wchan);
 1131         MPASS(sq != NULL);
 1132 
 1133         /* Thread is asleep on sleep queue sq, so wake it up. */
 1134         return (sleepq_resume_thread(sq, td, 0, 0));
 1135 }
 1136 
 1137 void
 1138 sleepq_chains_remove_matching(bool (*matches)(struct thread *))
 1139 {
 1140         struct sleepqueue_chain *sc;
 1141         struct sleepqueue *sq, *sq1;
 1142         int i, wakeup_swapper;
 1143 
 1144         wakeup_swapper = 0;
 1145         for (sc = &sleepq_chains[0]; sc < sleepq_chains + SC_TABLESIZE; ++sc) {
 1146                 if (LIST_EMPTY(&sc->sc_queues)) {
 1147                         continue;
 1148                 }
 1149                 mtx_lock_spin(&sc->sc_lock);
 1150                 LIST_FOREACH_SAFE(sq, &sc->sc_queues, sq_hash, sq1) {
 1151                         for (i = 0; i < NR_SLEEPQS; ++i) {
 1152                                 wakeup_swapper |= sleepq_remove_matching(sq, i,
 1153                                     matches, 0);
 1154                         }
 1155                 }
 1156                 mtx_unlock_spin(&sc->sc_lock);
 1157         }
 1158         if (wakeup_swapper) {
 1159                 kick_proc0();
 1160         }
 1161 }
 1162 
 1163 /*
 1164  * Prints the stacks of all threads presently sleeping on wchan/queue to
 1165  * the sbuf sb.  Sets count_stacks_printed to the number of stacks actually
 1166  * printed.  Typically, this will equal the number of threads sleeping on the
 1167  * queue, but may be less if sb overflowed before all stacks were printed.
 1168  */
 1169 #ifdef STACK
 1170 int
 1171 sleepq_sbuf_print_stacks(struct sbuf *sb, const void *wchan, int queue,
 1172     int *count_stacks_printed)
 1173 {
 1174         struct thread *td, *td_next;
 1175         struct sleepqueue *sq;
 1176         struct stack **st;
 1177         struct sbuf **td_infos;
 1178         int i, stack_idx, error, stacks_to_allocate;
 1179         bool finished;
 1180 
 1181         error = 0;
 1182         finished = false;
 1183 
 1184         KASSERT(wchan != NULL, ("%s: invalid NULL wait channel", __func__));
 1185         MPASS((queue >= 0) && (queue < NR_SLEEPQS));
 1186 
 1187         stacks_to_allocate = 10;
 1188         for (i = 0; i < 3 && !finished ; i++) {
 1189                 /* We cannot malloc while holding the queue's spinlock, so
 1190                  * we do our mallocs now, and hope it is enough.  If it
 1191                  * isn't, we will free these, drop the lock, malloc more,
 1192                  * and try again, up to a point.  After that point we will
 1193                  * give up and report ENOMEM. We also cannot write to sb
 1194                  * during this time since the client may have set the
 1195                  * SBUF_AUTOEXTEND flag on their sbuf, which could cause a
 1196                  * malloc as we print to it.  So we defer actually printing
 1197                  * to sb until after we drop the spinlock.
 1198                  */
 1199 
 1200                 /* Where we will store the stacks. */
 1201                 st = malloc(sizeof(struct stack *) * stacks_to_allocate,
 1202                     M_TEMP, M_WAITOK);
 1203                 for (stack_idx = 0; stack_idx < stacks_to_allocate;
 1204                     stack_idx++)
 1205                         st[stack_idx] = stack_create(M_WAITOK);
 1206 
 1207                 /* Where we will store the td name, tid, etc. */
 1208                 td_infos = malloc(sizeof(struct sbuf *) * stacks_to_allocate,
 1209                     M_TEMP, M_WAITOK);
 1210                 for (stack_idx = 0; stack_idx < stacks_to_allocate;
 1211                     stack_idx++)
 1212                         td_infos[stack_idx] = sbuf_new(NULL, NULL,
 1213                             MAXCOMLEN + sizeof(struct thread *) * 2 + 40,
 1214                             SBUF_FIXEDLEN);
 1215 
 1216                 sleepq_lock(wchan);
 1217                 sq = sleepq_lookup(wchan);
 1218                 if (sq == NULL) {
 1219                         /* This sleepq does not exist; exit and return ENOENT. */
 1220                         error = ENOENT;
 1221                         finished = true;
 1222                         sleepq_release(wchan);
 1223                         goto loop_end;
 1224                 }
 1225 
 1226                 stack_idx = 0;
 1227                 /* Save thread info */
 1228                 TAILQ_FOREACH_SAFE(td, &sq->sq_blocked[queue], td_slpq,
 1229                     td_next) {
 1230                         if (stack_idx >= stacks_to_allocate)
 1231                                 goto loop_end;
 1232 
 1233                         /* Note the td_lock is equal to the sleepq_lock here. */
 1234                         (void)stack_save_td(st[stack_idx], td);
 1235 
 1236                         sbuf_printf(td_infos[stack_idx], "%d: %s %p",
 1237                             td->td_tid, td->td_name, td);
 1238 
 1239                         ++stack_idx;
 1240                 }
 1241 
 1242                 finished = true;
 1243                 sleepq_release(wchan);
 1244 
 1245                 /* Print the stacks */
 1246                 for (i = 0; i < stack_idx; i++) {
 1247                         sbuf_finish(td_infos[i]);
 1248                         sbuf_printf(sb, "--- thread %s: ---\n", sbuf_data(td_infos[i]));
 1249                         stack_sbuf_print(sb, st[i]);
 1250                         sbuf_printf(sb, "\n");
 1251 
 1252                         error = sbuf_error(sb);
 1253                         if (error == 0)
 1254                                 *count_stacks_printed = stack_idx;
 1255                 }
 1256 
 1257 loop_end:
 1258                 if (!finished)
 1259                         sleepq_release(wchan);
 1260                 for (stack_idx = 0; stack_idx < stacks_to_allocate;
 1261                     stack_idx++)
 1262                         stack_destroy(st[stack_idx]);
 1263                 for (stack_idx = 0; stack_idx < stacks_to_allocate;
 1264                     stack_idx++)
 1265                         sbuf_delete(td_infos[stack_idx]);
 1266                 free(st, M_TEMP);
 1267                 free(td_infos, M_TEMP);
 1268                 stacks_to_allocate *= 10;
 1269         }
 1270 
 1271         if (!finished && error == 0)
 1272                 error = ENOMEM;
 1273 
 1274         return (error);
 1275 }
 1276 #endif
 1277 
 1278 #ifdef SLEEPQUEUE_PROFILING
 1279 #define SLEEPQ_PROF_LOCATIONS   1024
 1280 #define SLEEPQ_SBUFSIZE         512
 1281 struct sleepq_prof {
 1282         LIST_ENTRY(sleepq_prof) sp_link;
 1283         const char      *sp_wmesg;
 1284         long            sp_count;
 1285 };
 1286 
 1287 LIST_HEAD(sqphead, sleepq_prof);
 1288 
 1289 struct sqphead sleepq_prof_free;
 1290 struct sqphead sleepq_hash[SC_TABLESIZE];
 1291 static struct sleepq_prof sleepq_profent[SLEEPQ_PROF_LOCATIONS];
 1292 static struct mtx sleepq_prof_lock;
 1293 MTX_SYSINIT(sleepq_prof_lock, &sleepq_prof_lock, "sleepq_prof", MTX_SPIN);
 1294 
 1295 static void
 1296 sleepq_profile(const char *wmesg)
 1297 {
 1298         struct sleepq_prof *sp;
 1299 
 1300         mtx_lock_spin(&sleepq_prof_lock);
 1301         if (prof_enabled == 0)
 1302                 goto unlock;
 1303         LIST_FOREACH(sp, &sleepq_hash[SC_HASH(wmesg)], sp_link)
 1304                 if (sp->sp_wmesg == wmesg)
 1305                         goto done;
 1306         sp = LIST_FIRST(&sleepq_prof_free);
 1307         if (sp == NULL)
 1308                 goto unlock;
 1309         sp->sp_wmesg = wmesg;
 1310         LIST_REMOVE(sp, sp_link);
 1311         LIST_INSERT_HEAD(&sleepq_hash[SC_HASH(wmesg)], sp, sp_link);
 1312 done:
 1313         sp->sp_count++;
 1314 unlock:
 1315         mtx_unlock_spin(&sleepq_prof_lock);
 1316         return;
 1317 }
 1318 
 1319 static void
 1320 sleepq_prof_reset(void)
 1321 {
 1322         struct sleepq_prof *sp;
 1323         int enabled;
 1324         int i;
 1325 
 1326         mtx_lock_spin(&sleepq_prof_lock);
 1327         enabled = prof_enabled;
 1328         prof_enabled = 0;
 1329         for (i = 0; i < SC_TABLESIZE; i++)
 1330                 LIST_INIT(&sleepq_hash[i]);
 1331         LIST_INIT(&sleepq_prof_free);
 1332         for (i = 0; i < SLEEPQ_PROF_LOCATIONS; i++) {
 1333                 sp = &sleepq_profent[i];
 1334                 sp->sp_wmesg = NULL;
 1335                 sp->sp_count = 0;
 1336                 LIST_INSERT_HEAD(&sleepq_prof_free, sp, sp_link);
 1337         }
 1338         prof_enabled = enabled;
 1339         mtx_unlock_spin(&sleepq_prof_lock);
 1340 }
 1341 
 1342 static int
 1343 enable_sleepq_prof(SYSCTL_HANDLER_ARGS)
 1344 {
 1345         int error, v;
 1346 
 1347         v = prof_enabled;
 1348         error = sysctl_handle_int(oidp, &v, v, req);
 1349         if (error)
 1350                 return (error);
 1351         if (req->newptr == NULL)
 1352                 return (error);
 1353         if (v == prof_enabled)
 1354                 return (0);
 1355         if (v == 1)
 1356                 sleepq_prof_reset();
 1357         mtx_lock_spin(&sleepq_prof_lock);
 1358         prof_enabled = !!v;
 1359         mtx_unlock_spin(&sleepq_prof_lock);
 1360 
 1361         return (0);
 1362 }
 1363 
 1364 static int
 1365 reset_sleepq_prof_stats(SYSCTL_HANDLER_ARGS)
 1366 {
 1367         int error, v;
 1368 
 1369         v = 0;
 1370         error = sysctl_handle_int(oidp, &v, 0, req);
 1371         if (error)
 1372                 return (error);
 1373         if (req->newptr == NULL)
 1374                 return (error);
 1375         if (v == 0)
 1376                 return (0);
 1377         sleepq_prof_reset();
 1378 
 1379         return (0);
 1380 }
 1381 
 1382 static int
 1383 dump_sleepq_prof_stats(SYSCTL_HANDLER_ARGS)
 1384 {
 1385         struct sleepq_prof *sp;
 1386         struct sbuf *sb;
 1387         int enabled;
 1388         int error;
 1389         int i;
 1390 
 1391         error = sysctl_wire_old_buffer(req, 0);
 1392         if (error != 0)
 1393                 return (error);
 1394         sb = sbuf_new_for_sysctl(NULL, NULL, SLEEPQ_SBUFSIZE, req);
 1395         sbuf_printf(sb, "\nwmesg\tcount\n");
 1396         enabled = prof_enabled;
 1397         mtx_lock_spin(&sleepq_prof_lock);
 1398         prof_enabled = 0;
 1399         mtx_unlock_spin(&sleepq_prof_lock);
 1400         for (i = 0; i < SC_TABLESIZE; i++) {
 1401                 LIST_FOREACH(sp, &sleepq_hash[i], sp_link) {
 1402                         sbuf_printf(sb, "%s\t%ld\n",
 1403                             sp->sp_wmesg, sp->sp_count);
 1404                 }
 1405         }
 1406         mtx_lock_spin(&sleepq_prof_lock);
 1407         prof_enabled = enabled;
 1408         mtx_unlock_spin(&sleepq_prof_lock);
 1409 
 1410         error = sbuf_finish(sb);
 1411         sbuf_delete(sb);
 1412         return (error);
 1413 }
 1414 
 1415 SYSCTL_PROC(_debug_sleepq, OID_AUTO, stats,
 1416     CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_NEEDGIANT, NULL, 0,
 1417     dump_sleepq_prof_stats, "A",
 1418     "Sleepqueue profiling statistics");
 1419 SYSCTL_PROC(_debug_sleepq, OID_AUTO, reset,
 1420     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, 0,
 1421     reset_sleepq_prof_stats, "I",
 1422     "Reset sleepqueue profiling statistics");
 1423 SYSCTL_PROC(_debug_sleepq, OID_AUTO, enable,
 1424     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, 0,
 1425     enable_sleepq_prof, "I",
 1426     "Enable sleepqueue profiling");
 1427 #endif
 1428 
 1429 #ifdef DDB
 1430 DB_SHOW_COMMAND(sleepq, db_show_sleepqueue)
 1431 {
 1432         struct sleepqueue_chain *sc;
 1433         struct sleepqueue *sq;
 1434 #ifdef INVARIANTS
 1435         struct lock_object *lock;
 1436 #endif
 1437         struct thread *td;
 1438         void *wchan;
 1439         int i;
 1440 
 1441         if (!have_addr)
 1442                 return;
 1443 
 1444         /*
 1445          * First, see if there is an active sleep queue for the wait channel
 1446          * indicated by the address.
 1447          */
 1448         wchan = (void *)addr;
 1449         sc = SC_LOOKUP(wchan);
 1450         LIST_FOREACH(sq, &sc->sc_queues, sq_hash)
 1451                 if (sq->sq_wchan == wchan)
 1452                         goto found;
 1453 
 1454         /*
 1455          * Second, see if there is an active sleep queue at the address
 1456          * indicated.
 1457          */
 1458         for (i = 0; i < SC_TABLESIZE; i++)
 1459                 LIST_FOREACH(sq, &sleepq_chains[i].sc_queues, sq_hash) {
 1460                         if (sq == (struct sleepqueue *)addr)
 1461                                 goto found;
 1462                 }
 1463 
 1464         db_printf("Unable to locate a sleep queue via %p\n", (void *)addr);
 1465         return;
 1466 found:
 1467         db_printf("Wait channel: %p\n", sq->sq_wchan);
 1468         db_printf("Queue type: %d\n", sq->sq_type);
 1469 #ifdef INVARIANTS
 1470         if (sq->sq_lock) {
 1471                 lock = sq->sq_lock;
 1472                 db_printf("Associated Interlock: %p - (%s) %s\n", lock,
 1473                     LOCK_CLASS(lock)->lc_name, lock->lo_name);
 1474         }
 1475 #endif
 1476         db_printf("Blocked threads:\n");
 1477         for (i = 0; i < NR_SLEEPQS; i++) {
 1478                 db_printf("\nQueue[%d]:\n", i);
 1479                 if (TAILQ_EMPTY(&sq->sq_blocked[i]))
 1480                         db_printf("\tempty\n");
 1481                 else
 1482                         TAILQ_FOREACH(td, &sq->sq_blocked[i],
 1483                                       td_slpq) {
 1484                                 db_printf("\t%p (tid %d, pid %d, \"%s\")\n", td,
 1485                                           td->td_tid, td->td_proc->p_pid,
 1486                                           td->td_name);
 1487                         }
 1488                 db_printf("(expected: %u)\n", sq->sq_blockedcnt[i]);
 1489         }
 1490 }
 1491 
 1492 /* Alias 'show sleepqueue' to 'show sleepq'. */
 1493 DB_SHOW_ALIAS(sleepqueue, db_show_sleepqueue);
 1494 #endif

Cache object: 51513c7a7c5ffee38131d802eeb4cdd3


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.