The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/subr_sleepqueue.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 2004 John Baldwin <jhb@FreeBSD.org>
    3  * All rights reserved.
    4  *
    5  * Redistribution and use in source and binary forms, with or without
    6  * modification, are permitted provided that the following conditions
    7  * are met:
    8  * 1. Redistributions of source code must retain the above copyright
    9  *    notice, this list of conditions and the following disclaimer.
   10  * 2. Redistributions in binary form must reproduce the above copyright
   11  *    notice, this list of conditions and the following disclaimer in the
   12  *    documentation and/or other materials provided with the distribution.
   13  *
   14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   24  * SUCH DAMAGE.
   25  */
   26 
   27 /*
   28  * Implementation of sleep queues used to hold queue of threads blocked on
   29  * a wait channel.  Sleep queues are different from turnstiles in that wait
   30  * channels are not owned by anyone, so there is no priority propagation.
   31  * Sleep queues can also provide a timeout and can also be interrupted by
   32  * signals.  That said, there are several similarities between the turnstile
   33  * and sleep queue implementations.  (Note: turnstiles were implemented
   34  * first.)  For example, both use a hash table of the same size where each
   35  * bucket is referred to as a "chain" that contains both a spin lock and
   36  * a linked list of queues.  An individual queue is located by using a hash
   37  * to pick a chain, locking the chain, and then walking the chain searching
   38  * for the queue.  This means that a wait channel object does not need to
   39  * embed its queue head just as locks do not embed their turnstile queue
   40  * head.  Threads also carry around a sleep queue that they lend to the
   41  * wait channel when blocking.  Just as in turnstiles, the queue includes
   42  * a free list of the sleep queues of other threads blocked on the same
   43  * wait channel in the case of multiple waiters.
   44  *
   45  * Some additional functionality provided by sleep queues include the
   46  * ability to set a timeout.  The timeout is managed using a per-thread
   47  * callout that resumes a thread if it is asleep.  A thread may also
   48  * catch signals while it is asleep (aka an interruptible sleep).  The
   49  * signal code uses sleepq_abort() to interrupt a sleeping thread.  Finally,
   50  * sleep queues also provide some extra assertions.  One is not allowed to
   51  * mix the sleep/wakeup and cv APIs for a given wait channel.  Also, one
   52  * must consistently use the same lock to synchronize with a wait channel,
   53  * though this check is currently only a warning for sleep/wakeup due to
   54  * pre-existing abuse of that API.  The same lock must also be held when
   55  * awakening threads, though that is currently only enforced for condition
   56  * variables.
   57  */
   58 
   59 #include <sys/cdefs.h>
   60 __FBSDID("$FreeBSD: releng/11.1/sys/kern/subr_sleepqueue.c 316120 2017-03-29 01:21:48Z vangyzen $");
   61 
   62 #include "opt_sleepqueue_profiling.h"
   63 #include "opt_ddb.h"
   64 #include "opt_sched.h"
   65 #include "opt_stack.h"
   66 
   67 #include <sys/param.h>
   68 #include <sys/systm.h>
   69 #include <sys/lock.h>
   70 #include <sys/kernel.h>
   71 #include <sys/ktr.h>
   72 #include <sys/mutex.h>
   73 #include <sys/proc.h>
   74 #include <sys/sbuf.h>
   75 #include <sys/sched.h>
   76 #include <sys/sdt.h>
   77 #include <sys/signalvar.h>
   78 #include <sys/sleepqueue.h>
   79 #include <sys/stack.h>
   80 #include <sys/sysctl.h>
   81 #include <sys/time.h>
   82 
   83 #include <machine/atomic.h>
   84 
   85 #include <vm/uma.h>
   86 
   87 #ifdef DDB
   88 #include <ddb/ddb.h>
   89 #endif
   90 
   91 
   92 /*
   93  * Constants for the hash table of sleep queue chains.
   94  * SC_TABLESIZE must be a power of two for SC_MASK to work properly.
   95  */
   96 #define SC_TABLESIZE    256                     /* Must be power of 2. */
   97 #define SC_MASK         (SC_TABLESIZE - 1)
   98 #define SC_SHIFT        8
   99 #define SC_HASH(wc)     ((((uintptr_t)(wc) >> SC_SHIFT) ^ (uintptr_t)(wc)) & \
  100                             SC_MASK)
  101 #define SC_LOOKUP(wc)   &sleepq_chains[SC_HASH(wc)]
  102 #define NR_SLEEPQS      2
  103 /*
  104  * There are two different lists of sleep queues.  Both lists are connected
  105  * via the sq_hash entries.  The first list is the sleep queue chain list
  106  * that a sleep queue is on when it is attached to a wait channel.  The
  107  * second list is the free list hung off of a sleep queue that is attached
  108  * to a wait channel.
  109  *
  110  * Each sleep queue also contains the wait channel it is attached to, the
  111  * list of threads blocked on that wait channel, flags specific to the
  112  * wait channel, and the lock used to synchronize with a wait channel.
  113  * The flags are used to catch mismatches between the various consumers
  114  * of the sleep queue API (e.g. sleep/wakeup and condition variables).
  115  * The lock pointer is only used when invariants are enabled for various
  116  * debugging checks.
  117  *
  118  * Locking key:
  119  *  c - sleep queue chain lock
  120  */
  121 struct sleepqueue {
  122         TAILQ_HEAD(, thread) sq_blocked[NR_SLEEPQS];    /* (c) Blocked threads. */
  123         u_int sq_blockedcnt[NR_SLEEPQS];        /* (c) N. of blocked threads. */
  124         LIST_ENTRY(sleepqueue) sq_hash;         /* (c) Chain and free list. */
  125         LIST_HEAD(, sleepqueue) sq_free;        /* (c) Free queues. */
  126         void    *sq_wchan;                      /* (c) Wait channel. */
  127         int     sq_type;                        /* (c) Queue type. */
  128 #ifdef INVARIANTS
  129         struct lock_object *sq_lock;            /* (c) Associated lock. */
  130 #endif
  131 };
  132 
  133 struct sleepqueue_chain {
  134         LIST_HEAD(, sleepqueue) sc_queues;      /* List of sleep queues. */
  135         struct mtx sc_lock;                     /* Spin lock for this chain. */
  136 #ifdef SLEEPQUEUE_PROFILING
  137         u_int   sc_depth;                       /* Length of sc_queues. */
  138         u_int   sc_max_depth;                   /* Max length of sc_queues. */
  139 #endif
  140 };
  141 
  142 #ifdef SLEEPQUEUE_PROFILING
  143 u_int sleepq_max_depth;
  144 static SYSCTL_NODE(_debug, OID_AUTO, sleepq, CTLFLAG_RD, 0, "sleepq profiling");
  145 static SYSCTL_NODE(_debug_sleepq, OID_AUTO, chains, CTLFLAG_RD, 0,
  146     "sleepq chain stats");
  147 SYSCTL_UINT(_debug_sleepq, OID_AUTO, max_depth, CTLFLAG_RD, &sleepq_max_depth,
  148     0, "maxmimum depth achieved of a single chain");
  149 
  150 static void     sleepq_profile(const char *wmesg);
  151 static int      prof_enabled;
  152 #endif
  153 static struct sleepqueue_chain sleepq_chains[SC_TABLESIZE];
  154 static uma_zone_t sleepq_zone;
  155 
  156 /*
  157  * Prototypes for non-exported routines.
  158  */
  159 static int      sleepq_catch_signals(void *wchan, int pri);
  160 static int      sleepq_check_signals(void);
  161 static int      sleepq_check_timeout(void);
  162 #ifdef INVARIANTS
  163 static void     sleepq_dtor(void *mem, int size, void *arg);
  164 #endif
  165 static int      sleepq_init(void *mem, int size, int flags);
  166 static int      sleepq_resume_thread(struct sleepqueue *sq, struct thread *td,
  167                     int pri);
  168 static void     sleepq_switch(void *wchan, int pri);
  169 static void     sleepq_timeout(void *arg);
  170 
  171 SDT_PROBE_DECLARE(sched, , , sleep);
  172 SDT_PROBE_DECLARE(sched, , , wakeup);
  173 
  174 /*
  175  * Initialize SLEEPQUEUE_PROFILING specific sysctl nodes.
  176  * Note that it must happen after sleepinit() has been fully executed, so
  177  * it must happen after SI_SUB_KMEM SYSINIT() subsystem setup.
  178  */
  179 #ifdef SLEEPQUEUE_PROFILING
  180 static void
  181 init_sleepqueue_profiling(void)
  182 {
  183         char chain_name[10];
  184         struct sysctl_oid *chain_oid;
  185         u_int i;
  186 
  187         for (i = 0; i < SC_TABLESIZE; i++) {
  188                 snprintf(chain_name, sizeof(chain_name), "%u", i);
  189                 chain_oid = SYSCTL_ADD_NODE(NULL,
  190                     SYSCTL_STATIC_CHILDREN(_debug_sleepq_chains), OID_AUTO,
  191                     chain_name, CTLFLAG_RD, NULL, "sleepq chain stats");
  192                 SYSCTL_ADD_UINT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO,
  193                     "depth", CTLFLAG_RD, &sleepq_chains[i].sc_depth, 0, NULL);
  194                 SYSCTL_ADD_UINT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO,
  195                     "max_depth", CTLFLAG_RD, &sleepq_chains[i].sc_max_depth, 0,
  196                     NULL);
  197         }
  198 }
  199 
  200 SYSINIT(sleepqueue_profiling, SI_SUB_LOCK, SI_ORDER_ANY,
  201     init_sleepqueue_profiling, NULL);
  202 #endif
  203 
  204 /*
  205  * Early initialization of sleep queues that is called from the sleepinit()
  206  * SYSINIT.
  207  */
  208 void
  209 init_sleepqueues(void)
  210 {
  211         int i;
  212 
  213         for (i = 0; i < SC_TABLESIZE; i++) {
  214                 LIST_INIT(&sleepq_chains[i].sc_queues);
  215                 mtx_init(&sleepq_chains[i].sc_lock, "sleepq chain", NULL,
  216                     MTX_SPIN | MTX_RECURSE);
  217         }
  218         sleepq_zone = uma_zcreate("SLEEPQUEUE", sizeof(struct sleepqueue),
  219 #ifdef INVARIANTS
  220             NULL, sleepq_dtor, sleepq_init, NULL, UMA_ALIGN_CACHE, 0);
  221 #else
  222             NULL, NULL, sleepq_init, NULL, UMA_ALIGN_CACHE, 0);
  223 #endif
  224 
  225         thread0.td_sleepqueue = sleepq_alloc();
  226 }
  227 
  228 /*
  229  * Get a sleep queue for a new thread.
  230  */
  231 struct sleepqueue *
  232 sleepq_alloc(void)
  233 {
  234 
  235         return (uma_zalloc(sleepq_zone, M_WAITOK));
  236 }
  237 
  238 /*
  239  * Free a sleep queue when a thread is destroyed.
  240  */
  241 void
  242 sleepq_free(struct sleepqueue *sq)
  243 {
  244 
  245         uma_zfree(sleepq_zone, sq);
  246 }
  247 
  248 /*
  249  * Lock the sleep queue chain associated with the specified wait channel.
  250  */
  251 void
  252 sleepq_lock(void *wchan)
  253 {
  254         struct sleepqueue_chain *sc;
  255 
  256         sc = SC_LOOKUP(wchan);
  257         mtx_lock_spin(&sc->sc_lock);
  258 }
  259 
  260 /*
  261  * Look up the sleep queue associated with a given wait channel in the hash
  262  * table locking the associated sleep queue chain.  If no queue is found in
  263  * the table, NULL is returned.
  264  */
  265 struct sleepqueue *
  266 sleepq_lookup(void *wchan)
  267 {
  268         struct sleepqueue_chain *sc;
  269         struct sleepqueue *sq;
  270 
  271         KASSERT(wchan != NULL, ("%s: invalid NULL wait channel", __func__));
  272         sc = SC_LOOKUP(wchan);
  273         mtx_assert(&sc->sc_lock, MA_OWNED);
  274         LIST_FOREACH(sq, &sc->sc_queues, sq_hash)
  275                 if (sq->sq_wchan == wchan)
  276                         return (sq);
  277         return (NULL);
  278 }
  279 
  280 /*
  281  * Unlock the sleep queue chain associated with a given wait channel.
  282  */
  283 void
  284 sleepq_release(void *wchan)
  285 {
  286         struct sleepqueue_chain *sc;
  287 
  288         sc = SC_LOOKUP(wchan);
  289         mtx_unlock_spin(&sc->sc_lock);
  290 }
  291 
  292 /*
  293  * Places the current thread on the sleep queue for the specified wait
  294  * channel.  If INVARIANTS is enabled, then it associates the passed in
  295  * lock with the sleepq to make sure it is held when that sleep queue is
  296  * woken up.
  297  */
  298 void
  299 sleepq_add(void *wchan, struct lock_object *lock, const char *wmesg, int flags,
  300     int queue)
  301 {
  302         struct sleepqueue_chain *sc;
  303         struct sleepqueue *sq;
  304         struct thread *td;
  305 
  306         td = curthread;
  307         sc = SC_LOOKUP(wchan);
  308         mtx_assert(&sc->sc_lock, MA_OWNED);
  309         MPASS(td->td_sleepqueue != NULL);
  310         MPASS(wchan != NULL);
  311         MPASS((queue >= 0) && (queue < NR_SLEEPQS));
  312 
  313         /* If this thread is not allowed to sleep, die a horrible death. */
  314         KASSERT(td->td_no_sleeping == 0,
  315             ("%s: td %p to sleep on wchan %p with sleeping prohibited",
  316             __func__, td, wchan));
  317 
  318         /* Look up the sleep queue associated with the wait channel 'wchan'. */
  319         sq = sleepq_lookup(wchan);
  320 
  321         /*
  322          * If the wait channel does not already have a sleep queue, use
  323          * this thread's sleep queue.  Otherwise, insert the current thread
  324          * into the sleep queue already in use by this wait channel.
  325          */
  326         if (sq == NULL) {
  327 #ifdef INVARIANTS
  328                 int i;
  329 
  330                 sq = td->td_sleepqueue;
  331                 for (i = 0; i < NR_SLEEPQS; i++) {
  332                         KASSERT(TAILQ_EMPTY(&sq->sq_blocked[i]),
  333                             ("thread's sleep queue %d is not empty", i));
  334                         KASSERT(sq->sq_blockedcnt[i] == 0,
  335                             ("thread's sleep queue %d count mismatches", i));
  336                 }
  337                 KASSERT(LIST_EMPTY(&sq->sq_free),
  338                     ("thread's sleep queue has a non-empty free list"));
  339                 KASSERT(sq->sq_wchan == NULL, ("stale sq_wchan pointer"));
  340                 sq->sq_lock = lock;
  341 #endif
  342 #ifdef SLEEPQUEUE_PROFILING
  343                 sc->sc_depth++;
  344                 if (sc->sc_depth > sc->sc_max_depth) {
  345                         sc->sc_max_depth = sc->sc_depth;
  346                         if (sc->sc_max_depth > sleepq_max_depth)
  347                                 sleepq_max_depth = sc->sc_max_depth;
  348                 }
  349 #endif
  350                 sq = td->td_sleepqueue;
  351                 LIST_INSERT_HEAD(&sc->sc_queues, sq, sq_hash);
  352                 sq->sq_wchan = wchan;
  353                 sq->sq_type = flags & SLEEPQ_TYPE;
  354         } else {
  355                 MPASS(wchan == sq->sq_wchan);
  356                 MPASS(lock == sq->sq_lock);
  357                 MPASS((flags & SLEEPQ_TYPE) == sq->sq_type);
  358                 LIST_INSERT_HEAD(&sq->sq_free, td->td_sleepqueue, sq_hash);
  359         }
  360         thread_lock(td);
  361         TAILQ_INSERT_TAIL(&sq->sq_blocked[queue], td, td_slpq);
  362         sq->sq_blockedcnt[queue]++;
  363         td->td_sleepqueue = NULL;
  364         td->td_sqqueue = queue;
  365         td->td_wchan = wchan;
  366         td->td_wmesg = wmesg;
  367         if (flags & SLEEPQ_INTERRUPTIBLE) {
  368                 td->td_flags |= TDF_SINTR;
  369                 td->td_flags &= ~TDF_SLEEPABORT;
  370         }
  371         thread_unlock(td);
  372 }
  373 
  374 /*
  375  * Sets a timeout that will remove the current thread from the specified
  376  * sleep queue after timo ticks if the thread has not already been awakened.
  377  */
  378 void
  379 sleepq_set_timeout_sbt(void *wchan, sbintime_t sbt, sbintime_t pr,
  380     int flags)
  381 {
  382         struct sleepqueue_chain *sc;
  383         struct thread *td;
  384         sbintime_t pr1;
  385 
  386         td = curthread;
  387         sc = SC_LOOKUP(wchan);
  388         mtx_assert(&sc->sc_lock, MA_OWNED);
  389         MPASS(TD_ON_SLEEPQ(td));
  390         MPASS(td->td_sleepqueue == NULL);
  391         MPASS(wchan != NULL);
  392         if (cold && td == &thread0)
  393                 panic("timed sleep before timers are working");
  394         KASSERT(td->td_sleeptimo == 0, ("td %d %p td_sleeptimo %jx",
  395             td->td_tid, td, (uintmax_t)td->td_sleeptimo));
  396         thread_lock(td);
  397         callout_when(sbt, pr, flags, &td->td_sleeptimo, &pr1);
  398         thread_unlock(td);
  399         callout_reset_sbt_on(&td->td_slpcallout, td->td_sleeptimo, pr1,
  400             sleepq_timeout, td, PCPU_GET(cpuid), flags | C_PRECALC |
  401             C_DIRECT_EXEC);
  402 }
  403 
  404 /*
  405  * Return the number of actual sleepers for the specified queue.
  406  */
  407 u_int
  408 sleepq_sleepcnt(void *wchan, int queue)
  409 {
  410         struct sleepqueue *sq;
  411 
  412         KASSERT(wchan != NULL, ("%s: invalid NULL wait channel", __func__));
  413         MPASS((queue >= 0) && (queue < NR_SLEEPQS));
  414         sq = sleepq_lookup(wchan);
  415         if (sq == NULL)
  416                 return (0);
  417         return (sq->sq_blockedcnt[queue]);
  418 }
  419 
  420 /*
  421  * Marks the pending sleep of the current thread as interruptible and
  422  * makes an initial check for pending signals before putting a thread
  423  * to sleep. Enters and exits with the thread lock held.  Thread lock
  424  * may have transitioned from the sleepq lock to a run lock.
  425  */
  426 static int
  427 sleepq_catch_signals(void *wchan, int pri)
  428 {
  429         struct sleepqueue_chain *sc;
  430         struct sleepqueue *sq;
  431         struct thread *td;
  432         struct proc *p;
  433         struct sigacts *ps;
  434         int sig, ret;
  435 
  436         ret = 0;
  437         td = curthread;
  438         p = curproc;
  439         sc = SC_LOOKUP(wchan);
  440         mtx_assert(&sc->sc_lock, MA_OWNED);
  441         MPASS(wchan != NULL);
  442         if ((td->td_pflags & TDP_WAKEUP) != 0) {
  443                 td->td_pflags &= ~TDP_WAKEUP;
  444                 ret = EINTR;
  445                 thread_lock(td);
  446                 goto out;
  447         }
  448 
  449         /*
  450          * See if there are any pending signals or suspension requests for this
  451          * thread.  If not, we can switch immediately.
  452          */
  453         thread_lock(td);
  454         if ((td->td_flags & (TDF_NEEDSIGCHK | TDF_NEEDSUSPCHK)) != 0) {
  455                 thread_unlock(td);
  456                 mtx_unlock_spin(&sc->sc_lock);
  457                 CTR3(KTR_PROC, "sleepq catching signals: thread %p (pid %ld, %s)",
  458                         (void *)td, (long)p->p_pid, td->td_name);
  459                 PROC_LOCK(p);
  460                 /*
  461                  * Check for suspension first. Checking for signals and then
  462                  * suspending could result in a missed signal, since a signal
  463                  * can be delivered while this thread is suspended.
  464                  */
  465                 if ((td->td_flags & TDF_NEEDSUSPCHK) != 0) {
  466                         ret = thread_suspend_check(1);
  467                         MPASS(ret == 0 || ret == EINTR || ret == ERESTART);
  468                         if (ret != 0) {
  469                                 PROC_UNLOCK(p);
  470                                 mtx_lock_spin(&sc->sc_lock);
  471                                 thread_lock(td);
  472                                 goto out;
  473                         }
  474                 }
  475                 if ((td->td_flags & TDF_NEEDSIGCHK) != 0) {
  476                         ps = p->p_sigacts;
  477                         mtx_lock(&ps->ps_mtx);
  478                         sig = cursig(td);
  479                         if (sig == -1) {
  480                                 mtx_unlock(&ps->ps_mtx);
  481                                 KASSERT((td->td_flags & TDF_SBDRY) != 0,
  482                                     ("lost TDF_SBDRY"));
  483                                 KASSERT(TD_SBDRY_INTR(td),
  484                                     ("lost TDF_SERESTART of TDF_SEINTR"));
  485                                 KASSERT((td->td_flags &
  486                                     (TDF_SEINTR | TDF_SERESTART)) !=
  487                                     (TDF_SEINTR | TDF_SERESTART),
  488                                     ("both TDF_SEINTR and TDF_SERESTART"));
  489                                 ret = TD_SBDRY_ERRNO(td);
  490                         } else if (sig != 0) {
  491                                 ret = SIGISMEMBER(ps->ps_sigintr, sig) ?
  492                                     EINTR : ERESTART;
  493                                 mtx_unlock(&ps->ps_mtx);
  494                         } else {
  495                                 mtx_unlock(&ps->ps_mtx);
  496                         }
  497                 }
  498                 /*
  499                  * Lock the per-process spinlock prior to dropping the PROC_LOCK
  500                  * to avoid a signal delivery race.  PROC_LOCK, PROC_SLOCK, and
  501                  * thread_lock() are currently held in tdsendsignal().
  502                  */
  503                 PROC_SLOCK(p);
  504                 mtx_lock_spin(&sc->sc_lock);
  505                 PROC_UNLOCK(p);
  506                 thread_lock(td);
  507                 PROC_SUNLOCK(p);
  508         }
  509         if (ret == 0) {
  510                 sleepq_switch(wchan, pri);
  511                 return (0);
  512         }
  513 out:
  514         /*
  515          * There were pending signals and this thread is still
  516          * on the sleep queue, remove it from the sleep queue.
  517          */
  518         if (TD_ON_SLEEPQ(td)) {
  519                 sq = sleepq_lookup(wchan);
  520                 if (sleepq_resume_thread(sq, td, 0)) {
  521 #ifdef INVARIANTS
  522                         /*
  523                          * This thread hasn't gone to sleep yet, so it
  524                          * should not be swapped out.
  525                          */
  526                         panic("not waking up swapper");
  527 #endif
  528                 }
  529         }
  530         mtx_unlock_spin(&sc->sc_lock);
  531         MPASS(td->td_lock != &sc->sc_lock);
  532         return (ret);
  533 }
  534 
  535 /*
  536  * Switches to another thread if we are still asleep on a sleep queue.
  537  * Returns with thread lock.
  538  */
  539 static void
  540 sleepq_switch(void *wchan, int pri)
  541 {
  542         struct sleepqueue_chain *sc;
  543         struct sleepqueue *sq;
  544         struct thread *td;
  545         bool rtc_changed;
  546 
  547         td = curthread;
  548         sc = SC_LOOKUP(wchan);
  549         mtx_assert(&sc->sc_lock, MA_OWNED);
  550         THREAD_LOCK_ASSERT(td, MA_OWNED);
  551 
  552         /*
  553          * If we have a sleep queue, then we've already been woken up, so
  554          * just return.
  555          */
  556         if (td->td_sleepqueue != NULL) {
  557                 mtx_unlock_spin(&sc->sc_lock);
  558                 return;
  559         }
  560 
  561         /*
  562          * If TDF_TIMEOUT is set, then our sleep has been timed out
  563          * already but we are still on the sleep queue, so dequeue the
  564          * thread and return.
  565          *
  566          * Do the same if the real-time clock has been adjusted since this
  567          * thread calculated its timeout based on that clock.  This handles
  568          * the following race:
  569          * - The Ts thread needs to sleep until an absolute real-clock time.
  570          *   It copies the global rtc_generation into curthread->td_rtcgen,
  571          *   reads the RTC, and calculates a sleep duration based on that time.
  572          *   See umtxq_sleep() for an example.
  573          * - The Tc thread adjusts the RTC, bumps rtc_generation, and wakes
  574          *   threads that are sleeping until an absolute real-clock time.
  575          *   See tc_setclock() and the POSIX specification of clock_settime().
  576          * - Ts reaches the code below.  It holds the sleepqueue chain lock,
  577          *   so Tc has finished waking, so this thread must test td_rtcgen.
  578          * (The declaration of td_rtcgen refers to this comment.)
  579          */
  580         rtc_changed = td->td_rtcgen != 0 && td->td_rtcgen != rtc_generation;
  581         if ((td->td_flags & TDF_TIMEOUT) || rtc_changed) {
  582                 if (rtc_changed) {
  583                         td->td_rtcgen = 0;
  584                 }
  585                 MPASS(TD_ON_SLEEPQ(td));
  586                 sq = sleepq_lookup(wchan);
  587                 if (sleepq_resume_thread(sq, td, 0)) {
  588 #ifdef INVARIANTS
  589                         /*
  590                          * This thread hasn't gone to sleep yet, so it
  591                          * should not be swapped out.
  592                          */
  593                         panic("not waking up swapper");
  594 #endif
  595                 }
  596                 mtx_unlock_spin(&sc->sc_lock);
  597                 return;
  598         }
  599 #ifdef SLEEPQUEUE_PROFILING
  600         if (prof_enabled)
  601                 sleepq_profile(td->td_wmesg);
  602 #endif
  603         MPASS(td->td_sleepqueue == NULL);
  604         sched_sleep(td, pri);
  605         thread_lock_set(td, &sc->sc_lock);
  606         SDT_PROBE0(sched, , , sleep);
  607         TD_SET_SLEEPING(td);
  608         mi_switch(SW_VOL | SWT_SLEEPQ, NULL);
  609         KASSERT(TD_IS_RUNNING(td), ("running but not TDS_RUNNING"));
  610         CTR3(KTR_PROC, "sleepq resume: thread %p (pid %ld, %s)",
  611             (void *)td, (long)td->td_proc->p_pid, (void *)td->td_name);
  612 }
  613 
  614 /*
  615  * Check to see if we timed out.
  616  */
  617 static int
  618 sleepq_check_timeout(void)
  619 {
  620         struct thread *td;
  621         int res;
  622 
  623         td = curthread;
  624         THREAD_LOCK_ASSERT(td, MA_OWNED);
  625 
  626         /*
  627          * If TDF_TIMEOUT is set, we timed out.  But recheck
  628          * td_sleeptimo anyway.
  629          */
  630         res = 0;
  631         if (td->td_sleeptimo != 0) {
  632                 if (td->td_sleeptimo <= sbinuptime())
  633                         res = EWOULDBLOCK;
  634                 td->td_sleeptimo = 0;
  635         }
  636         if (td->td_flags & TDF_TIMEOUT)
  637                 td->td_flags &= ~TDF_TIMEOUT;
  638         else
  639                 /*
  640                  * We ignore the situation where timeout subsystem was
  641                  * unable to stop our callout.  The struct thread is
  642                  * type-stable, the callout will use the correct
  643                  * memory when running.  The checks of the
  644                  * td_sleeptimo value in this function and in
  645                  * sleepq_timeout() ensure that the thread does not
  646                  * get spurious wakeups, even if the callout was reset
  647                  * or thread reused.
  648                  */
  649                 callout_stop(&td->td_slpcallout);
  650         return (res);
  651 }
  652 
  653 /*
  654  * Check to see if we were awoken by a signal.
  655  */
  656 static int
  657 sleepq_check_signals(void)
  658 {
  659         struct thread *td;
  660 
  661         td = curthread;
  662         THREAD_LOCK_ASSERT(td, MA_OWNED);
  663 
  664         /* We are no longer in an interruptible sleep. */
  665         if (td->td_flags & TDF_SINTR)
  666                 td->td_flags &= ~TDF_SINTR;
  667 
  668         if (td->td_flags & TDF_SLEEPABORT) {
  669                 td->td_flags &= ~TDF_SLEEPABORT;
  670                 return (td->td_intrval);
  671         }
  672 
  673         return (0);
  674 }
  675 
  676 /*
  677  * Block the current thread until it is awakened from its sleep queue.
  678  */
  679 void
  680 sleepq_wait(void *wchan, int pri)
  681 {
  682         struct thread *td;
  683 
  684         td = curthread;
  685         MPASS(!(td->td_flags & TDF_SINTR));
  686         thread_lock(td);
  687         sleepq_switch(wchan, pri);
  688         thread_unlock(td);
  689 }
  690 
  691 /*
  692  * Block the current thread until it is awakened from its sleep queue
  693  * or it is interrupted by a signal.
  694  */
  695 int
  696 sleepq_wait_sig(void *wchan, int pri)
  697 {
  698         int rcatch;
  699         int rval;
  700 
  701         rcatch = sleepq_catch_signals(wchan, pri);
  702         rval = sleepq_check_signals();
  703         thread_unlock(curthread);
  704         if (rcatch)
  705                 return (rcatch);
  706         return (rval);
  707 }
  708 
  709 /*
  710  * Block the current thread until it is awakened from its sleep queue
  711  * or it times out while waiting.
  712  */
  713 int
  714 sleepq_timedwait(void *wchan, int pri)
  715 {
  716         struct thread *td;
  717         int rval;
  718 
  719         td = curthread;
  720         MPASS(!(td->td_flags & TDF_SINTR));
  721         thread_lock(td);
  722         sleepq_switch(wchan, pri);
  723         rval = sleepq_check_timeout();
  724         thread_unlock(td);
  725 
  726         return (rval);
  727 }
  728 
  729 /*
  730  * Block the current thread until it is awakened from its sleep queue,
  731  * it is interrupted by a signal, or it times out waiting to be awakened.
  732  */
  733 int
  734 sleepq_timedwait_sig(void *wchan, int pri)
  735 {
  736         int rcatch, rvalt, rvals;
  737 
  738         rcatch = sleepq_catch_signals(wchan, pri);
  739         rvalt = sleepq_check_timeout();
  740         rvals = sleepq_check_signals();
  741         thread_unlock(curthread);
  742         if (rcatch)
  743                 return (rcatch);
  744         if (rvals)
  745                 return (rvals);
  746         return (rvalt);
  747 }
  748 
  749 /*
  750  * Returns the type of sleepqueue given a waitchannel.
  751  */
  752 int
  753 sleepq_type(void *wchan)
  754 {
  755         struct sleepqueue *sq;
  756         int type;
  757 
  758         MPASS(wchan != NULL);
  759 
  760         sleepq_lock(wchan);
  761         sq = sleepq_lookup(wchan);
  762         if (sq == NULL) {
  763                 sleepq_release(wchan);
  764                 return (-1);
  765         }
  766         type = sq->sq_type;
  767         sleepq_release(wchan);
  768         return (type);
  769 }
  770 
  771 /*
  772  * Removes a thread from a sleep queue and makes it
  773  * runnable.
  774  */
  775 static int
  776 sleepq_resume_thread(struct sleepqueue *sq, struct thread *td, int pri)
  777 {
  778         struct sleepqueue_chain *sc;
  779 
  780         MPASS(td != NULL);
  781         MPASS(sq->sq_wchan != NULL);
  782         MPASS(td->td_wchan == sq->sq_wchan);
  783         MPASS(td->td_sqqueue < NR_SLEEPQS && td->td_sqqueue >= 0);
  784         THREAD_LOCK_ASSERT(td, MA_OWNED);
  785         sc = SC_LOOKUP(sq->sq_wchan);
  786         mtx_assert(&sc->sc_lock, MA_OWNED);
  787 
  788         SDT_PROBE2(sched, , , wakeup, td, td->td_proc);
  789 
  790         /* Remove the thread from the queue. */
  791         sq->sq_blockedcnt[td->td_sqqueue]--;
  792         TAILQ_REMOVE(&sq->sq_blocked[td->td_sqqueue], td, td_slpq);
  793 
  794         /*
  795          * Get a sleep queue for this thread.  If this is the last waiter,
  796          * use the queue itself and take it out of the chain, otherwise,
  797          * remove a queue from the free list.
  798          */
  799         if (LIST_EMPTY(&sq->sq_free)) {
  800                 td->td_sleepqueue = sq;
  801 #ifdef INVARIANTS
  802                 sq->sq_wchan = NULL;
  803 #endif
  804 #ifdef SLEEPQUEUE_PROFILING
  805                 sc->sc_depth--;
  806 #endif
  807         } else
  808                 td->td_sleepqueue = LIST_FIRST(&sq->sq_free);
  809         LIST_REMOVE(td->td_sleepqueue, sq_hash);
  810 
  811         td->td_wmesg = NULL;
  812         td->td_wchan = NULL;
  813         td->td_flags &= ~TDF_SINTR;
  814 
  815         CTR3(KTR_PROC, "sleepq_wakeup: thread %p (pid %ld, %s)",
  816             (void *)td, (long)td->td_proc->p_pid, td->td_name);
  817 
  818         /* Adjust priority if requested. */
  819         MPASS(pri == 0 || (pri >= PRI_MIN && pri <= PRI_MAX));
  820         if (pri != 0 && td->td_priority > pri &&
  821             PRI_BASE(td->td_pri_class) == PRI_TIMESHARE)
  822                 sched_prio(td, pri);
  823 
  824         /*
  825          * Note that thread td might not be sleeping if it is running
  826          * sleepq_catch_signals() on another CPU or is blocked on its
  827          * proc lock to check signals.  There's no need to mark the
  828          * thread runnable in that case.
  829          */
  830         if (TD_IS_SLEEPING(td)) {
  831                 TD_CLR_SLEEPING(td);
  832                 return (setrunnable(td));
  833         }
  834         return (0);
  835 }
  836 
  837 #ifdef INVARIANTS
  838 /*
  839  * UMA zone item deallocator.
  840  */
  841 static void
  842 sleepq_dtor(void *mem, int size, void *arg)
  843 {
  844         struct sleepqueue *sq;
  845         int i;
  846 
  847         sq = mem;
  848         for (i = 0; i < NR_SLEEPQS; i++) {
  849                 MPASS(TAILQ_EMPTY(&sq->sq_blocked[i]));
  850                 MPASS(sq->sq_blockedcnt[i] == 0);
  851         }
  852 }
  853 #endif
  854 
  855 /*
  856  * UMA zone item initializer.
  857  */
  858 static int
  859 sleepq_init(void *mem, int size, int flags)
  860 {
  861         struct sleepqueue *sq;
  862         int i;
  863 
  864         bzero(mem, size);
  865         sq = mem;
  866         for (i = 0; i < NR_SLEEPQS; i++) {
  867                 TAILQ_INIT(&sq->sq_blocked[i]);
  868                 sq->sq_blockedcnt[i] = 0;
  869         }
  870         LIST_INIT(&sq->sq_free);
  871         return (0);
  872 }
  873 
  874 /*
  875  * Find the highest priority thread sleeping on a wait channel and resume it.
  876  */
  877 int
  878 sleepq_signal(void *wchan, int flags, int pri, int queue)
  879 {
  880         struct sleepqueue *sq;
  881         struct thread *td, *besttd;
  882         int wakeup_swapper;
  883 
  884         CTR2(KTR_PROC, "sleepq_signal(%p, %d)", wchan, flags);
  885         KASSERT(wchan != NULL, ("%s: invalid NULL wait channel", __func__));
  886         MPASS((queue >= 0) && (queue < NR_SLEEPQS));
  887         sq = sleepq_lookup(wchan);
  888         if (sq == NULL)
  889                 return (0);
  890         KASSERT(sq->sq_type == (flags & SLEEPQ_TYPE),
  891             ("%s: mismatch between sleep/wakeup and cv_*", __func__));
  892 
  893         /*
  894          * Find the highest priority thread on the queue.  If there is a
  895          * tie, use the thread that first appears in the queue as it has
  896          * been sleeping the longest since threads are always added to
  897          * the tail of sleep queues.
  898          */
  899         besttd = NULL;
  900         TAILQ_FOREACH(td, &sq->sq_blocked[queue], td_slpq) {
  901                 if (besttd == NULL || td->td_priority < besttd->td_priority)
  902                         besttd = td;
  903         }
  904         MPASS(besttd != NULL);
  905         thread_lock(besttd);
  906         wakeup_swapper = sleepq_resume_thread(sq, besttd, pri);
  907         thread_unlock(besttd);
  908         return (wakeup_swapper);
  909 }
  910 
  911 static bool
  912 match_any(struct thread *td __unused)
  913 {
  914 
  915         return (true);
  916 }
  917 
  918 /*
  919  * Resume all threads sleeping on a specified wait channel.
  920  */
  921 int
  922 sleepq_broadcast(void *wchan, int flags, int pri, int queue)
  923 {
  924         struct sleepqueue *sq;
  925 
  926         CTR2(KTR_PROC, "sleepq_broadcast(%p, %d)", wchan, flags);
  927         KASSERT(wchan != NULL, ("%s: invalid NULL wait channel", __func__));
  928         MPASS((queue >= 0) && (queue < NR_SLEEPQS));
  929         sq = sleepq_lookup(wchan);
  930         if (sq == NULL)
  931                 return (0);
  932         KASSERT(sq->sq_type == (flags & SLEEPQ_TYPE),
  933             ("%s: mismatch between sleep/wakeup and cv_*", __func__));
  934 
  935         return (sleepq_remove_matching(sq, queue, match_any, pri));
  936 }
  937 
  938 /*
  939  * Resume threads on the sleep queue that match the given predicate.
  940  */
  941 int
  942 sleepq_remove_matching(struct sleepqueue *sq, int queue,
  943     bool (*matches)(struct thread *), int pri)
  944 {
  945         struct thread *td, *tdn;
  946         int wakeup_swapper;
  947 
  948         /*
  949          * The last thread will be given ownership of sq and may
  950          * re-enqueue itself before sleepq_resume_thread() returns,
  951          * so we must cache the "next" queue item at the beginning
  952          * of the final iteration.
  953          */
  954         wakeup_swapper = 0;
  955         TAILQ_FOREACH_SAFE(td, &sq->sq_blocked[queue], td_slpq, tdn) {
  956                 thread_lock(td);
  957                 if (matches(td))
  958                         wakeup_swapper |= sleepq_resume_thread(sq, td, pri);
  959                 thread_unlock(td);
  960         }
  961 
  962         return (wakeup_swapper);
  963 }
  964 
  965 /*
  966  * Time sleeping threads out.  When the timeout expires, the thread is
  967  * removed from the sleep queue and made runnable if it is still asleep.
  968  */
  969 static void
  970 sleepq_timeout(void *arg)
  971 {
  972         struct sleepqueue_chain *sc;
  973         struct sleepqueue *sq;
  974         struct thread *td;
  975         void *wchan;
  976         int wakeup_swapper;
  977 
  978         td = arg;
  979         wakeup_swapper = 0;
  980         CTR3(KTR_PROC, "sleepq_timeout: thread %p (pid %ld, %s)",
  981             (void *)td, (long)td->td_proc->p_pid, (void *)td->td_name);
  982 
  983         thread_lock(td);
  984 
  985         if (td->td_sleeptimo > sbinuptime() || td->td_sleeptimo == 0) {
  986                 /*
  987                  * The thread does not want a timeout (yet).
  988                  */
  989         } else if (TD_IS_SLEEPING(td) && TD_ON_SLEEPQ(td)) {
  990                 /*
  991                  * See if the thread is asleep and get the wait
  992                  * channel if it is.
  993                  */
  994                 wchan = td->td_wchan;
  995                 sc = SC_LOOKUP(wchan);
  996                 THREAD_LOCKPTR_ASSERT(td, &sc->sc_lock);
  997                 sq = sleepq_lookup(wchan);
  998                 MPASS(sq != NULL);
  999                 td->td_flags |= TDF_TIMEOUT;
 1000                 wakeup_swapper = sleepq_resume_thread(sq, td, 0);
 1001         } else if (TD_ON_SLEEPQ(td)) {
 1002                 /*
 1003                  * If the thread is on the SLEEPQ but isn't sleeping
 1004                  * yet, it can either be on another CPU in between
 1005                  * sleepq_add() and one of the sleepq_*wait*()
 1006                  * routines or it can be in sleepq_catch_signals().
 1007                  */
 1008                 td->td_flags |= TDF_TIMEOUT;
 1009         }
 1010 
 1011         thread_unlock(td);
 1012         if (wakeup_swapper)
 1013                 kick_proc0();
 1014 }
 1015 
 1016 /*
 1017  * Resumes a specific thread from the sleep queue associated with a specific
 1018  * wait channel if it is on that queue.
 1019  */
 1020 void
 1021 sleepq_remove(struct thread *td, void *wchan)
 1022 {
 1023         struct sleepqueue *sq;
 1024         int wakeup_swapper;
 1025 
 1026         /*
 1027          * Look up the sleep queue for this wait channel, then re-check
 1028          * that the thread is asleep on that channel, if it is not, then
 1029          * bail.
 1030          */
 1031         MPASS(wchan != NULL);
 1032         sleepq_lock(wchan);
 1033         sq = sleepq_lookup(wchan);
 1034         /*
 1035          * We can not lock the thread here as it may be sleeping on a
 1036          * different sleepq.  However, holding the sleepq lock for this
 1037          * wchan can guarantee that we do not miss a wakeup for this
 1038          * channel.  The asserts below will catch any false positives.
 1039          */
 1040         if (!TD_ON_SLEEPQ(td) || td->td_wchan != wchan) {
 1041                 sleepq_release(wchan);
 1042                 return;
 1043         }
 1044         /* Thread is asleep on sleep queue sq, so wake it up. */
 1045         thread_lock(td);
 1046         MPASS(sq != NULL);
 1047         MPASS(td->td_wchan == wchan);
 1048         wakeup_swapper = sleepq_resume_thread(sq, td, 0);
 1049         thread_unlock(td);
 1050         sleepq_release(wchan);
 1051         if (wakeup_swapper)
 1052                 kick_proc0();
 1053 }
 1054 
 1055 /*
 1056  * Abort a thread as if an interrupt had occurred.  Only abort
 1057  * interruptible waits (unfortunately it isn't safe to abort others).
 1058  */
 1059 int
 1060 sleepq_abort(struct thread *td, int intrval)
 1061 {
 1062         struct sleepqueue *sq;
 1063         void *wchan;
 1064 
 1065         THREAD_LOCK_ASSERT(td, MA_OWNED);
 1066         MPASS(TD_ON_SLEEPQ(td));
 1067         MPASS(td->td_flags & TDF_SINTR);
 1068         MPASS(intrval == EINTR || intrval == ERESTART);
 1069 
 1070         /*
 1071          * If the TDF_TIMEOUT flag is set, just leave. A
 1072          * timeout is scheduled anyhow.
 1073          */
 1074         if (td->td_flags & TDF_TIMEOUT)
 1075                 return (0);
 1076 
 1077         CTR3(KTR_PROC, "sleepq_abort: thread %p (pid %ld, %s)",
 1078             (void *)td, (long)td->td_proc->p_pid, (void *)td->td_name);
 1079         td->td_intrval = intrval;
 1080         td->td_flags |= TDF_SLEEPABORT;
 1081         /*
 1082          * If the thread has not slept yet it will find the signal in
 1083          * sleepq_catch_signals() and call sleepq_resume_thread.  Otherwise
 1084          * we have to do it here.
 1085          */
 1086         if (!TD_IS_SLEEPING(td))
 1087                 return (0);
 1088         wchan = td->td_wchan;
 1089         MPASS(wchan != NULL);
 1090         sq = sleepq_lookup(wchan);
 1091         MPASS(sq != NULL);
 1092 
 1093         /* Thread is asleep on sleep queue sq, so wake it up. */
 1094         return (sleepq_resume_thread(sq, td, 0));
 1095 }
 1096 
 1097 void
 1098 sleepq_chains_remove_matching(bool (*matches)(struct thread *))
 1099 {
 1100         struct sleepqueue_chain *sc;
 1101         struct sleepqueue *sq;
 1102         int i, wakeup_swapper;
 1103 
 1104         wakeup_swapper = 0;
 1105         for (sc = &sleepq_chains[0]; sc < sleepq_chains + SC_TABLESIZE; ++sc) {
 1106                 if (LIST_EMPTY(&sc->sc_queues)) {
 1107                         continue;
 1108                 }
 1109                 mtx_lock_spin(&sc->sc_lock);
 1110                 LIST_FOREACH(sq, &sc->sc_queues, sq_hash) {
 1111                         for (i = 0; i < NR_SLEEPQS; ++i) {
 1112                                 wakeup_swapper |= sleepq_remove_matching(sq, i,
 1113                                     matches, 0);
 1114                         }
 1115                 }
 1116                 mtx_unlock_spin(&sc->sc_lock);
 1117         }
 1118         if (wakeup_swapper) {
 1119                 kick_proc0();
 1120         }
 1121 }
 1122 
 1123 /*
 1124  * Prints the stacks of all threads presently sleeping on wchan/queue to
 1125  * the sbuf sb.  Sets count_stacks_printed to the number of stacks actually
 1126  * printed.  Typically, this will equal the number of threads sleeping on the
 1127  * queue, but may be less if sb overflowed before all stacks were printed.
 1128  */
 1129 #ifdef STACK
 1130 int
 1131 sleepq_sbuf_print_stacks(struct sbuf *sb, void *wchan, int queue,
 1132     int *count_stacks_printed)
 1133 {
 1134         struct thread *td, *td_next;
 1135         struct sleepqueue *sq;
 1136         struct stack **st;
 1137         struct sbuf **td_infos;
 1138         int i, stack_idx, error, stacks_to_allocate;
 1139         bool finished, partial_print;
 1140 
 1141         error = 0;
 1142         finished = false;
 1143         partial_print = false;
 1144 
 1145         KASSERT(wchan != NULL, ("%s: invalid NULL wait channel", __func__));
 1146         MPASS((queue >= 0) && (queue < NR_SLEEPQS));
 1147 
 1148         stacks_to_allocate = 10;
 1149         for (i = 0; i < 3 && !finished ; i++) {
 1150                 /* We cannot malloc while holding the queue's spinlock, so
 1151                  * we do our mallocs now, and hope it is enough.  If it
 1152                  * isn't, we will free these, drop the lock, malloc more,
 1153                  * and try again, up to a point.  After that point we will
 1154                  * give up and report ENOMEM. We also cannot write to sb
 1155                  * during this time since the client may have set the
 1156                  * SBUF_AUTOEXTEND flag on their sbuf, which could cause a
 1157                  * malloc as we print to it.  So we defer actually printing
 1158                  * to sb until after we drop the spinlock.
 1159                  */
 1160 
 1161                 /* Where we will store the stacks. */
 1162                 st = malloc(sizeof(struct stack *) * stacks_to_allocate,
 1163                     M_TEMP, M_WAITOK);
 1164                 for (stack_idx = 0; stack_idx < stacks_to_allocate;
 1165                     stack_idx++)
 1166                         st[stack_idx] = stack_create();
 1167 
 1168                 /* Where we will store the td name, tid, etc. */
 1169                 td_infos = malloc(sizeof(struct sbuf *) * stacks_to_allocate,
 1170                     M_TEMP, M_WAITOK);
 1171                 for (stack_idx = 0; stack_idx < stacks_to_allocate;
 1172                     stack_idx++)
 1173                         td_infos[stack_idx] = sbuf_new(NULL, NULL,
 1174                             MAXCOMLEN + sizeof(struct thread *) * 2 + 40,
 1175                             SBUF_FIXEDLEN);
 1176 
 1177                 sleepq_lock(wchan);
 1178                 sq = sleepq_lookup(wchan);
 1179                 if (sq == NULL) {
 1180                         /* This sleepq does not exist; exit and return ENOENT. */
 1181                         error = ENOENT;
 1182                         finished = true;
 1183                         sleepq_release(wchan);
 1184                         goto loop_end;
 1185                 }
 1186 
 1187                 stack_idx = 0;
 1188                 /* Save thread info */
 1189                 TAILQ_FOREACH_SAFE(td, &sq->sq_blocked[queue], td_slpq,
 1190                     td_next) {
 1191                         if (stack_idx >= stacks_to_allocate)
 1192                                 goto loop_end;
 1193 
 1194                         /* Note the td_lock is equal to the sleepq_lock here. */
 1195                         stack_save_td(st[stack_idx], td);
 1196 
 1197                         sbuf_printf(td_infos[stack_idx], "%d: %s %p",
 1198                             td->td_tid, td->td_name, td);
 1199 
 1200                         ++stack_idx;
 1201                 }
 1202 
 1203                 finished = true;
 1204                 sleepq_release(wchan);
 1205 
 1206                 /* Print the stacks */
 1207                 for (i = 0; i < stack_idx; i++) {
 1208                         sbuf_finish(td_infos[i]);
 1209                         sbuf_printf(sb, "--- thread %s: ---\n", sbuf_data(td_infos[i]));
 1210                         stack_sbuf_print(sb, st[i]);
 1211                         sbuf_printf(sb, "\n");
 1212 
 1213                         error = sbuf_error(sb);
 1214                         if (error == 0)
 1215                                 *count_stacks_printed = stack_idx;
 1216                 }
 1217 
 1218 loop_end:
 1219                 if (!finished)
 1220                         sleepq_release(wchan);
 1221                 for (stack_idx = 0; stack_idx < stacks_to_allocate;
 1222                     stack_idx++)
 1223                         stack_destroy(st[stack_idx]);
 1224                 for (stack_idx = 0; stack_idx < stacks_to_allocate;
 1225                     stack_idx++)
 1226                         sbuf_delete(td_infos[stack_idx]);
 1227                 free(st, M_TEMP);
 1228                 free(td_infos, M_TEMP);
 1229                 stacks_to_allocate *= 10;
 1230         }
 1231 
 1232         if (!finished && error == 0)
 1233                 error = ENOMEM;
 1234 
 1235         return (error);
 1236 }
 1237 #endif
 1238 
 1239 #ifdef SLEEPQUEUE_PROFILING
 1240 #define SLEEPQ_PROF_LOCATIONS   1024
 1241 #define SLEEPQ_SBUFSIZE         512
 1242 struct sleepq_prof {
 1243         LIST_ENTRY(sleepq_prof) sp_link;
 1244         const char      *sp_wmesg;
 1245         long            sp_count;
 1246 };
 1247 
 1248 LIST_HEAD(sqphead, sleepq_prof);
 1249 
 1250 struct sqphead sleepq_prof_free;
 1251 struct sqphead sleepq_hash[SC_TABLESIZE];
 1252 static struct sleepq_prof sleepq_profent[SLEEPQ_PROF_LOCATIONS];
 1253 static struct mtx sleepq_prof_lock;
 1254 MTX_SYSINIT(sleepq_prof_lock, &sleepq_prof_lock, "sleepq_prof", MTX_SPIN);
 1255 
 1256 static void
 1257 sleepq_profile(const char *wmesg)
 1258 {
 1259         struct sleepq_prof *sp;
 1260 
 1261         mtx_lock_spin(&sleepq_prof_lock);
 1262         if (prof_enabled == 0)
 1263                 goto unlock;
 1264         LIST_FOREACH(sp, &sleepq_hash[SC_HASH(wmesg)], sp_link)
 1265                 if (sp->sp_wmesg == wmesg)
 1266                         goto done;
 1267         sp = LIST_FIRST(&sleepq_prof_free);
 1268         if (sp == NULL)
 1269                 goto unlock;
 1270         sp->sp_wmesg = wmesg;
 1271         LIST_REMOVE(sp, sp_link);
 1272         LIST_INSERT_HEAD(&sleepq_hash[SC_HASH(wmesg)], sp, sp_link);
 1273 done:
 1274         sp->sp_count++;
 1275 unlock:
 1276         mtx_unlock_spin(&sleepq_prof_lock);
 1277         return;
 1278 }
 1279 
 1280 static void
 1281 sleepq_prof_reset(void)
 1282 {
 1283         struct sleepq_prof *sp;
 1284         int enabled;
 1285         int i;
 1286 
 1287         mtx_lock_spin(&sleepq_prof_lock);
 1288         enabled = prof_enabled;
 1289         prof_enabled = 0;
 1290         for (i = 0; i < SC_TABLESIZE; i++)
 1291                 LIST_INIT(&sleepq_hash[i]);
 1292         LIST_INIT(&sleepq_prof_free);
 1293         for (i = 0; i < SLEEPQ_PROF_LOCATIONS; i++) {
 1294                 sp = &sleepq_profent[i];
 1295                 sp->sp_wmesg = NULL;
 1296                 sp->sp_count = 0;
 1297                 LIST_INSERT_HEAD(&sleepq_prof_free, sp, sp_link);
 1298         }
 1299         prof_enabled = enabled;
 1300         mtx_unlock_spin(&sleepq_prof_lock);
 1301 }
 1302 
 1303 static int
 1304 enable_sleepq_prof(SYSCTL_HANDLER_ARGS)
 1305 {
 1306         int error, v;
 1307 
 1308         v = prof_enabled;
 1309         error = sysctl_handle_int(oidp, &v, v, req);
 1310         if (error)
 1311                 return (error);
 1312         if (req->newptr == NULL)
 1313                 return (error);
 1314         if (v == prof_enabled)
 1315                 return (0);
 1316         if (v == 1)
 1317                 sleepq_prof_reset();
 1318         mtx_lock_spin(&sleepq_prof_lock);
 1319         prof_enabled = !!v;
 1320         mtx_unlock_spin(&sleepq_prof_lock);
 1321 
 1322         return (0);
 1323 }
 1324 
 1325 static int
 1326 reset_sleepq_prof_stats(SYSCTL_HANDLER_ARGS)
 1327 {
 1328         int error, v;
 1329 
 1330         v = 0;
 1331         error = sysctl_handle_int(oidp, &v, 0, req);
 1332         if (error)
 1333                 return (error);
 1334         if (req->newptr == NULL)
 1335                 return (error);
 1336         if (v == 0)
 1337                 return (0);
 1338         sleepq_prof_reset();
 1339 
 1340         return (0);
 1341 }
 1342 
 1343 static int
 1344 dump_sleepq_prof_stats(SYSCTL_HANDLER_ARGS)
 1345 {
 1346         struct sleepq_prof *sp;
 1347         struct sbuf *sb;
 1348         int enabled;
 1349         int error;
 1350         int i;
 1351 
 1352         error = sysctl_wire_old_buffer(req, 0);
 1353         if (error != 0)
 1354                 return (error);
 1355         sb = sbuf_new_for_sysctl(NULL, NULL, SLEEPQ_SBUFSIZE, req);
 1356         sbuf_printf(sb, "\nwmesg\tcount\n");
 1357         enabled = prof_enabled;
 1358         mtx_lock_spin(&sleepq_prof_lock);
 1359         prof_enabled = 0;
 1360         mtx_unlock_spin(&sleepq_prof_lock);
 1361         for (i = 0; i < SC_TABLESIZE; i++) {
 1362                 LIST_FOREACH(sp, &sleepq_hash[i], sp_link) {
 1363                         sbuf_printf(sb, "%s\t%ld\n",
 1364                             sp->sp_wmesg, sp->sp_count);
 1365                 }
 1366         }
 1367         mtx_lock_spin(&sleepq_prof_lock);
 1368         prof_enabled = enabled;
 1369         mtx_unlock_spin(&sleepq_prof_lock);
 1370 
 1371         error = sbuf_finish(sb);
 1372         sbuf_delete(sb);
 1373         return (error);
 1374 }
 1375 
 1376 SYSCTL_PROC(_debug_sleepq, OID_AUTO, stats, CTLTYPE_STRING | CTLFLAG_RD,
 1377     NULL, 0, dump_sleepq_prof_stats, "A", "Sleepqueue profiling statistics");
 1378 SYSCTL_PROC(_debug_sleepq, OID_AUTO, reset, CTLTYPE_INT | CTLFLAG_RW,
 1379     NULL, 0, reset_sleepq_prof_stats, "I",
 1380     "Reset sleepqueue profiling statistics");
 1381 SYSCTL_PROC(_debug_sleepq, OID_AUTO, enable, CTLTYPE_INT | CTLFLAG_RW,
 1382     NULL, 0, enable_sleepq_prof, "I", "Enable sleepqueue profiling");
 1383 #endif
 1384 
 1385 #ifdef DDB
 1386 DB_SHOW_COMMAND(sleepq, db_show_sleepqueue)
 1387 {
 1388         struct sleepqueue_chain *sc;
 1389         struct sleepqueue *sq;
 1390 #ifdef INVARIANTS
 1391         struct lock_object *lock;
 1392 #endif
 1393         struct thread *td;
 1394         void *wchan;
 1395         int i;
 1396 
 1397         if (!have_addr)
 1398                 return;
 1399 
 1400         /*
 1401          * First, see if there is an active sleep queue for the wait channel
 1402          * indicated by the address.
 1403          */
 1404         wchan = (void *)addr;
 1405         sc = SC_LOOKUP(wchan);
 1406         LIST_FOREACH(sq, &sc->sc_queues, sq_hash)
 1407                 if (sq->sq_wchan == wchan)
 1408                         goto found;
 1409 
 1410         /*
 1411          * Second, see if there is an active sleep queue at the address
 1412          * indicated.
 1413          */
 1414         for (i = 0; i < SC_TABLESIZE; i++)
 1415                 LIST_FOREACH(sq, &sleepq_chains[i].sc_queues, sq_hash) {
 1416                         if (sq == (struct sleepqueue *)addr)
 1417                                 goto found;
 1418                 }
 1419 
 1420         db_printf("Unable to locate a sleep queue via %p\n", (void *)addr);
 1421         return;
 1422 found:
 1423         db_printf("Wait channel: %p\n", sq->sq_wchan);
 1424         db_printf("Queue type: %d\n", sq->sq_type);
 1425 #ifdef INVARIANTS
 1426         if (sq->sq_lock) {
 1427                 lock = sq->sq_lock;
 1428                 db_printf("Associated Interlock: %p - (%s) %s\n", lock,
 1429                     LOCK_CLASS(lock)->lc_name, lock->lo_name);
 1430         }
 1431 #endif
 1432         db_printf("Blocked threads:\n");
 1433         for (i = 0; i < NR_SLEEPQS; i++) {
 1434                 db_printf("\nQueue[%d]:\n", i);
 1435                 if (TAILQ_EMPTY(&sq->sq_blocked[i]))
 1436                         db_printf("\tempty\n");
 1437                 else
 1438                         TAILQ_FOREACH(td, &sq->sq_blocked[0],
 1439                                       td_slpq) {
 1440                                 db_printf("\t%p (tid %d, pid %d, \"%s\")\n", td,
 1441                                           td->td_tid, td->td_proc->p_pid,
 1442                                           td->td_name);
 1443                         }
 1444                 db_printf("(expected: %u)\n", sq->sq_blockedcnt[i]);
 1445         }
 1446 }
 1447 
 1448 /* Alias 'show sleepqueue' to 'show sleepq'. */
 1449 DB_SHOW_ALIAS(sleepqueue, db_show_sleepqueue);
 1450 #endif

Cache object: b5d4bc2a4c4cb3cdc9e7629993e341c8


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.