subr_sleepqueue.c

Version: - FREEBSD - FREEBSD-13-STABLE - FREEBSD-13-0 - FREEBSD-12-STABLE - FREEBSD-12-0 - FREEBSD-11-STABLE - FREEBSD-11-0 - FREEBSD-10-STABLE - FREEBSD-10-0 - FREEBSD-9-STABLE - FREEBSD-9-0 - FREEBSD-8-STABLE - FREEBSD-8-0 - FREEBSD-7-STABLE - FREEBSD-7-0 - FREEBSD-6-STABLE - FREEBSD-6-0 - FREEBSD-5-STABLE - FREEBSD-5-0 - FREEBSD-4-STABLE - FREEBSD-3-STABLE - FREEBSD22 - l41 - OPENBSD - linux-2.6 - MK84 - PLAN9 - xnu-8792
SearchContext: - none - 3 - 10
    1 /*-
    2  * Copyright (c) 2004 John Baldwin <jhb@FreeBSD.org>
    3  *
    4  * Redistribution and use in source and binary forms, with or without
    5  * modification, are permitted provided that the following conditions
    6  * are met:
    7  * 1. Redistributions of source code must retain the above copyright
    8  *    notice, this list of conditions and the following disclaimer.
    9  * 2. Redistributions in binary form must reproduce the above copyright
   10  *    notice, this list of conditions and the following disclaimer in the
   11  *    documentation and/or other materials provided with the distribution.
   12  *
   13  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   14  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   16  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   17  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   18  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   19  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   20  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   21  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   22  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   23  * SUCH DAMAGE.
   24  */
   25 
   26 /*
   27  * Implementation of sleep queues used to hold queue of threads blocked on
   28  * a wait channel.  Sleep queues are different from turnstiles in that wait
   29  * channels are not owned by anyone, so there is no priority propagation.
   30  * Sleep queues can also provide a timeout and can also be interrupted by
   31  * signals.  That said, there are several similarities between the turnstile
   32  * and sleep queue implementations.  (Note: turnstiles were implemented
   33  * first.)  For example, both use a hash table of the same size where each
   34  * bucket is referred to as a "chain" that contains both a spin lock and
   35  * a linked list of queues.  An individual queue is located by using a hash
   36  * to pick a chain, locking the chain, and then walking the chain searching
   37  * for the queue.  This means that a wait channel object does not need to
   38  * embed its queue head just as locks do not embed their turnstile queue
   39  * head.  Threads also carry around a sleep queue that they lend to the
   40  * wait channel when blocking.  Just as in turnstiles, the queue includes
   41  * a free list of the sleep queues of other threads blocked on the same
   42  * wait channel in the case of multiple waiters.
   43  *
   44  * Some additional functionality provided by sleep queues include the
   45  * ability to set a timeout.  The timeout is managed using a per-thread
   46  * callout that resumes a thread if it is asleep.  A thread may also
   47  * catch signals while it is asleep (aka an interruptible sleep).  The
   48  * signal code uses sleepq_abort() to interrupt a sleeping thread.  Finally,
   49  * sleep queues also provide some extra assertions.  One is not allowed to
   50  * mix the sleep/wakeup and cv APIs for a given wait channel.  Also, one
   51  * must consistently use the same lock to synchronize with a wait channel,
   52  * though this check is currently only a warning for sleep/wakeup due to
   53  * pre-existing abuse of that API.  The same lock must also be held when
   54  * awakening threads, though that is currently only enforced for condition
   55  * variables.
   56  */
   57 
   58 #include <sys/cdefs.h>
   59 __FBSDID("$FreeBSD$");
   60 
   61 #include "opt_sleepqueue_profiling.h"
   62 #include "opt_ddb.h"
   63 #include "opt_sched.h"
   64 #include "opt_stack.h"
   65 
   66 #include <sys/param.h>
   67 #include <sys/systm.h>
   68 #include <sys/lock.h>
   69 #include <sys/kernel.h>
   70 #include <sys/ktr.h>
   71 #include <sys/mutex.h>
   72 #include <sys/proc.h>
   73 #include <sys/sbuf.h>
   74 #include <sys/sched.h>
   75 #include <sys/sdt.h>
   76 #include <sys/signalvar.h>
   77 #include <sys/sleepqueue.h>
   78 #include <sys/stack.h>
   79 #include <sys/sysctl.h>
   80 #include <sys/time.h>
   81 
   82 #include <machine/atomic.h>
   83 
   84 #include <vm/uma.h>
   85 
   86 #ifdef DDB
   87 #include <ddb/ddb.h>
   88 #endif
   89 
   90 
   91 /*
   92  * Constants for the hash table of sleep queue chains.
   93  * SC_TABLESIZE must be a power of two for SC_MASK to work properly.
   94  */
   95 #define SC_TABLESIZE    256                     /* Must be power of 2. */
   96 #define SC_MASK         (SC_TABLESIZE - 1)
   97 #define SC_SHIFT        8
   98 #define SC_HASH(wc)     ((((uintptr_t)(wc) >> SC_SHIFT) ^ (uintptr_t)(wc)) & \
   99                             SC_MASK)
  100 #define SC_LOOKUP(wc)   &sleepq_chains[SC_HASH(wc)]
  101 #define NR_SLEEPQS      2
  102 /*
  103  * There are two different lists of sleep queues.  Both lists are connected
  104  * via the sq_hash entries.  The first list is the sleep queue chain list
  105  * that a sleep queue is on when it is attached to a wait channel.  The
  106  * second list is the free list hung off of a sleep queue that is attached
  107  * to a wait channel.
  108  *
  109  * Each sleep queue also contains the wait channel it is attached to, the
  110  * list of threads blocked on that wait channel, flags specific to the
  111  * wait channel, and the lock used to synchronize with a wait channel.
  112  * The flags are used to catch mismatches between the various consumers
  113  * of the sleep queue API (e.g. sleep/wakeup and condition variables).
  114  * The lock pointer is only used when invariants are enabled for various
  115  * debugging checks.
  116  *
  117  * Locking key:
  118  *  c - sleep queue chain lock
  119  */
  120 struct sleepqueue {
  121         struct threadqueue sq_blocked[NR_SLEEPQS]; /* (c) Blocked threads. */
  122         u_int sq_blockedcnt[NR_SLEEPQS];        /* (c) N. of blocked threads. */
  123         LIST_ENTRY(sleepqueue) sq_hash;         /* (c) Chain and free list. */
  124         LIST_HEAD(, sleepqueue) sq_free;        /* (c) Free queues. */
  125         void    *sq_wchan;                      /* (c) Wait channel. */
  126         int     sq_type;                        /* (c) Queue type. */
  127 #ifdef INVARIANTS
  128         struct lock_object *sq_lock;            /* (c) Associated lock. */
  129 #endif
  130 };
  131 
  132 struct sleepqueue_chain {
  133         LIST_HEAD(, sleepqueue) sc_queues;      /* List of sleep queues. */
  134         struct mtx sc_lock;                     /* Spin lock for this chain. */
  135 #ifdef SLEEPQUEUE_PROFILING
  136         u_int   sc_depth;                       /* Length of sc_queues. */
  137         u_int   sc_max_depth;                   /* Max length of sc_queues. */
  138 #endif
  139 };
  140 
  141 #ifdef SLEEPQUEUE_PROFILING
  142 u_int sleepq_max_depth;
  143 static SYSCTL_NODE(_debug, OID_AUTO, sleepq, CTLFLAG_RD, 0, "sleepq profiling");
  144 static SYSCTL_NODE(_debug_sleepq, OID_AUTO, chains, CTLFLAG_RD, 0,
  145     "sleepq chain stats");
  146 SYSCTL_UINT(_debug_sleepq, OID_AUTO, max_depth, CTLFLAG_RD, &sleepq_max_depth,
  147     0, "maxmimum depth achieved of a single chain");
  148 
  149 static void     sleepq_profile(const char *wmesg);
  150 static int      prof_enabled;
  151 #endif
  152 static struct sleepqueue_chain sleepq_chains[SC_TABLESIZE];
  153 static uma_zone_t sleepq_zone;
  154 
  155 /*
  156  * Prototypes for non-exported routines.
  157  */
  158 static int      sleepq_catch_signals(void *wchan, int pri);
  159 static int      sleepq_check_signals(void);
  160 static int      sleepq_check_timeout(void);
  161 #ifdef INVARIANTS
  162 static void     sleepq_dtor(void *mem, int size, void *arg);
  163 #endif
  164 static int      sleepq_init(void *mem, int size, int flags);
  165 static int      sleepq_resume_thread(struct sleepqueue *sq, struct thread *td,
  166                     int pri);
  167 static void     sleepq_switch(void *wchan, int pri);
  168 static void     sleepq_timeout(void *arg);
  169 
  170 SDT_PROBE_DECLARE(sched, , , sleep);
  171 SDT_PROBE_DECLARE(sched, , , wakeup);
  172 
  173 /*
  174  * Initialize SLEEPQUEUE_PROFILING specific sysctl nodes.
  175  * Note that it must happen after sleepinit() has been fully executed, so
  176  * it must happen after SI_SUB_KMEM SYSINIT() subsystem setup.
  177  */
  178 #ifdef SLEEPQUEUE_PROFILING
  179 static void
  180 init_sleepqueue_profiling(void)
  181 {
  182         char chain_name[10];
  183         struct sysctl_oid *chain_oid;
  184         u_int i;
  185 
  186         for (i = 0; i < SC_TABLESIZE; i++) {
  187                 snprintf(chain_name, sizeof(chain_name), "%u", i);
  188                 chain_oid = SYSCTL_ADD_NODE(NULL,
  189                     SYSCTL_STATIC_CHILDREN(_debug_sleepq_chains), OID_AUTO,
  190                     chain_name, CTLFLAG_RD, NULL, "sleepq chain stats");
  191                 SYSCTL_ADD_UINT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO,
  192                     "depth", CTLFLAG_RD, &sleepq_chains[i].sc_depth, 0, NULL);
  193                 SYSCTL_ADD_UINT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO,
  194                     "max_depth", CTLFLAG_RD, &sleepq_chains[i].sc_max_depth, 0,
  195                     NULL);
  196         }
  197 }
  198 
  199 SYSINIT(sleepqueue_profiling, SI_SUB_LOCK, SI_ORDER_ANY,
  200     init_sleepqueue_profiling, NULL);
  201 #endif
  202 
  203 /*
  204  * Early initialization of sleep queues that is called from the sleepinit()
  205  * SYSINIT.
  206  */
  207 void
  208 init_sleepqueues(void)
  209 {
  210         int i;
  211 
  212         for (i = 0; i < SC_TABLESIZE; i++) {
  213                 LIST_INIT(&sleepq_chains[i].sc_queues);
  214                 mtx_init(&sleepq_chains[i].sc_lock, "sleepq chain", NULL,
  215                     MTX_SPIN | MTX_RECURSE);
  216         }
  217         sleepq_zone = uma_zcreate("SLEEPQUEUE", sizeof(struct sleepqueue),
  218 #ifdef INVARIANTS
  219             NULL, sleepq_dtor, sleepq_init, NULL, UMA_ALIGN_CACHE, 0);
  220 #else
  221             NULL, NULL, sleepq_init, NULL, UMA_ALIGN_CACHE, 0);
  222 #endif
  223 
  224         thread0.td_sleepqueue = sleepq_alloc();
  225 }
  226 
  227 /*
  228  * Get a sleep queue for a new thread.
  229  */
  230 struct sleepqueue *
  231 sleepq_alloc(void)
  232 {
  233 
  234         return (uma_zalloc(sleepq_zone, M_WAITOK));
  235 }
  236 
  237 /*
  238  * Free a sleep queue when a thread is destroyed.
  239  */
  240 void
  241 sleepq_free(struct sleepqueue *sq)
  242 {
  243 
  244         uma_zfree(sleepq_zone, sq);
  245 }
  246 
  247 /*
  248  * Lock the sleep queue chain associated with the specified wait channel.
  249  */
  250 void
  251 sleepq_lock(void *wchan)
  252 {
  253         struct sleepqueue_chain *sc;
  254 
  255         sc = SC_LOOKUP(wchan);
  256         mtx_lock_spin(&sc->sc_lock);
  257 }
  258 
  259 /*
  260  * Look up the sleep queue associated with a given wait channel in the hash
  261  * table locking the associated sleep queue chain.  If no queue is found in
  262  * the table, NULL is returned.
  263  */
  264 struct sleepqueue *
  265 sleepq_lookup(void *wchan)
  266 {
  267         struct sleepqueue_chain *sc;
  268         struct sleepqueue *sq;
  269 
  270         KASSERT(wchan != NULL, ("%s: invalid NULL wait channel", __func__));
  271         sc = SC_LOOKUP(wchan);
  272         mtx_assert(&sc->sc_lock, MA_OWNED);
  273         LIST_FOREACH(sq, &sc->sc_queues, sq_hash)
  274                 if (sq->sq_wchan == wchan)
  275                         return (sq);
  276         return (NULL);
  277 }
  278 
  279 /*
  280  * Unlock the sleep queue chain associated with a given wait channel.
  281  */
  282 void
  283 sleepq_release(void *wchan)
  284 {
  285         struct sleepqueue_chain *sc;
  286 
  287         sc = SC_LOOKUP(wchan);
  288         mtx_unlock_spin(&sc->sc_lock);
  289 }
  290 
  291 /*
  292  * Places the current thread on the sleep queue for the specified wait
  293  * channel.  If INVARIANTS is enabled, then it associates the passed in
  294  * lock with the sleepq to make sure it is held when that sleep queue is
  295  * woken up.
  296  */
  297 void
  298 sleepq_add(void *wchan, struct lock_object *lock, const char *wmesg, int flags,
  299     int queue)
  300 {
  301         struct sleepqueue_chain *sc;
  302         struct sleepqueue *sq;
  303         struct thread *td;
  304 
  305         td = curthread;
  306         sc = SC_LOOKUP(wchan);
  307         mtx_assert(&sc->sc_lock, MA_OWNED);
  308         MPASS(td->td_sleepqueue != NULL);
  309         MPASS(wchan != NULL);
  310         MPASS((queue >= 0) && (queue < NR_SLEEPQS));
  311 
  312         /* If this thread is not allowed to sleep, die a horrible death. */
  313         KASSERT(td->td_no_sleeping == 0,
  314             ("%s: td %p to sleep on wchan %p with sleeping prohibited",
  315             __func__, td, wchan));
  316 
  317         /* Look up the sleep queue associated with the wait channel 'wchan'. */
  318         sq = sleepq_lookup(wchan);
  319 
  320         /*
  321          * If the wait channel does not already have a sleep queue, use
  322          * this thread's sleep queue.  Otherwise, insert the current thread
  323          * into the sleep queue already in use by this wait channel.
  324          */
  325         if (sq == NULL) {
  326 #ifdef INVARIANTS
  327                 int i;
  328 
  329                 sq = td->td_sleepqueue;
  330                 for (i = 0; i < NR_SLEEPQS; i++) {
  331                         KASSERT(TAILQ_EMPTY(&sq->sq_blocked[i]),
  332                             ("thread's sleep queue %d is not empty", i));
  333                         KASSERT(sq->sq_blockedcnt[i] == 0,
  334                             ("thread's sleep queue %d count mismatches", i));
  335                 }
  336                 KASSERT(LIST_EMPTY(&sq->sq_free),
  337                     ("thread's sleep queue has a non-empty free list"));
  338                 KASSERT(sq->sq_wchan == NULL, ("stale sq_wchan pointer"));
  339                 sq->sq_lock = lock;
  340 #endif
  341 #ifdef SLEEPQUEUE_PROFILING
  342                 sc->sc_depth++;
  343                 if (sc->sc_depth > sc->sc_max_depth) {
  344                         sc->sc_max_depth = sc->sc_depth;
  345                         if (sc->sc_max_depth > sleepq_max_depth)
  346                                 sleepq_max_depth = sc->sc_max_depth;
  347                 }
  348 #endif
  349                 sq = td->td_sleepqueue;
  350                 LIST_INSERT_HEAD(&sc->sc_queues, sq, sq_hash);
  351                 sq->sq_wchan = wchan;
  352                 sq->sq_type = flags & SLEEPQ_TYPE;
  353         } else {
  354                 MPASS(wchan == sq->sq_wchan);
  355                 MPASS(lock == sq->sq_lock);
  356                 MPASS((flags & SLEEPQ_TYPE) == sq->sq_type);
  357                 LIST_INSERT_HEAD(&sq->sq_free, td->td_sleepqueue, sq_hash);
  358         }
  359         thread_lock(td);
  360         TAILQ_INSERT_TAIL(&sq->sq_blocked[queue], td, td_slpq);
  361         sq->sq_blockedcnt[queue]++;
  362         td->td_sleepqueue = NULL;
  363         td->td_sqqueue = queue;
  364         td->td_wchan = wchan;
  365         td->td_wmesg = wmesg;
  366         if (flags & SLEEPQ_INTERRUPTIBLE) {
  367                 td->td_flags |= TDF_SINTR;
  368                 td->td_flags &= ~TDF_SLEEPABORT;
  369         }
  370         thread_unlock(td);
  371 }
  372 
  373 /*
  374  * Sets a timeout that will remove the current thread from the specified
  375  * sleep queue after timo ticks if the thread has not already been awakened.
  376  */
  377 void
  378 sleepq_set_timeout_sbt(void *wchan, sbintime_t sbt, sbintime_t pr,
  379     int flags)
  380 {
  381         struct sleepqueue_chain *sc;
  382         struct thread *td;
  383         sbintime_t pr1;
  384 
  385         td = curthread;
  386         sc = SC_LOOKUP(wchan);
  387         mtx_assert(&sc->sc_lock, MA_OWNED);
  388         MPASS(TD_ON_SLEEPQ(td));
  389         MPASS(td->td_sleepqueue == NULL);
  390         MPASS(wchan != NULL);
  391         if (cold && td == &thread0)
  392                 panic("timed sleep before timers are working");
  393         KASSERT(td->td_sleeptimo == 0, ("td %d %p td_sleeptimo %jx",
  394             td->td_tid, td, (uintmax_t)td->td_sleeptimo));
  395         thread_lock(td);
  396         callout_when(sbt, pr, flags, &td->td_sleeptimo, &pr1);
  397         thread_unlock(td);
  398         callout_reset_sbt_on(&td->td_slpcallout, td->td_sleeptimo, pr1,
  399             sleepq_timeout, td, PCPU_GET(cpuid), flags | C_PRECALC |
  400             C_DIRECT_EXEC);
  401 }
  402 
  403 /*
  404  * Return the number of actual sleepers for the specified queue.
  405  */
  406 u_int
  407 sleepq_sleepcnt(void *wchan, int queue)
  408 {
  409         struct sleepqueue *sq;
  410 
  411         KASSERT(wchan != NULL, ("%s: invalid NULL wait channel", __func__));
  412         MPASS((queue >= 0) && (queue < NR_SLEEPQS));
  413         sq = sleepq_lookup(wchan);
  414         if (sq == NULL)
  415                 return (0);
  416         return (sq->sq_blockedcnt[queue]);
  417 }
  418 
  419 /*
  420  * Marks the pending sleep of the current thread as interruptible and
  421  * makes an initial check for pending signals before putting a thread
  422  * to sleep. Enters and exits with the thread lock held.  Thread lock
  423  * may have transitioned from the sleepq lock to a run lock.
  424  */
  425 static int
  426 sleepq_catch_signals(void *wchan, int pri)
  427 {
  428         struct sleepqueue_chain *sc;
  429         struct sleepqueue *sq;
  430         struct thread *td;
  431         struct proc *p;
  432         struct sigacts *ps;
  433         int sig, ret;
  434 
  435         ret = 0;
  436         td = curthread;
  437         p = curproc;
  438         sc = SC_LOOKUP(wchan);
  439         mtx_assert(&sc->sc_lock, MA_OWNED);
  440         MPASS(wchan != NULL);
  441         if ((td->td_pflags & TDP_WAKEUP) != 0) {
  442                 td->td_pflags &= ~TDP_WAKEUP;
  443                 ret = EINTR;
  444                 thread_lock(td);
  445                 goto out;
  446         }
  447 
  448         /*
  449          * See if there are any pending signals or suspension requests for this
  450          * thread.  If not, we can switch immediately.
  451          */
  452         thread_lock(td);
  453         if ((td->td_flags & (TDF_NEEDSIGCHK | TDF_NEEDSUSPCHK)) != 0) {
  454                 thread_unlock(td);
  455                 mtx_unlock_spin(&sc->sc_lock);
  456                 CTR3(KTR_PROC, "sleepq catching signals: thread %p (pid %ld, %s)",
  457                         (void *)td, (long)p->p_pid, td->td_name);
  458                 PROC_LOCK(p);
  459                 /*
  460                  * Check for suspension first. Checking for signals and then
  461                  * suspending could result in a missed signal, since a signal
  462                  * can be delivered while this thread is suspended.
  463                  */
  464                 if ((td->td_flags & TDF_NEEDSUSPCHK) != 0) {
  465                         ret = thread_suspend_check(1);
  466                         MPASS(ret == 0 || ret == EINTR || ret == ERESTART);
  467                         if (ret != 0) {
  468                                 PROC_UNLOCK(p);
  469                                 mtx_lock_spin(&sc->sc_lock);
  470                                 thread_lock(td);
  471                                 goto out;
  472                         }
  473                 }
  474                 if ((td->td_flags & TDF_NEEDSIGCHK) != 0) {
  475                         ps = p->p_sigacts;
  476                         mtx_lock(&ps->ps_mtx);
  477                         sig = cursig(td);
  478                         if (sig == -1) {
  479                                 mtx_unlock(&ps->ps_mtx);
  480                                 KASSERT((td->td_flags & TDF_SBDRY) != 0,
  481                                     ("lost TDF_SBDRY"));
  482                                 KASSERT(TD_SBDRY_INTR(td),
  483                                     ("lost TDF_SERESTART of TDF_SEINTR"));
  484                                 KASSERT((td->td_flags &
  485                                     (TDF_SEINTR | TDF_SERESTART)) !=
  486                                     (TDF_SEINTR | TDF_SERESTART),
  487                                     ("both TDF_SEINTR and TDF_SERESTART"));
  488                                 ret = TD_SBDRY_ERRNO(td);
  489                         } else if (sig != 0) {
  490                                 ret = SIGISMEMBER(ps->ps_sigintr, sig) ?
  491                                     EINTR : ERESTART;
  492                                 mtx_unlock(&ps->ps_mtx);
  493                         } else {
  494                                 mtx_unlock(&ps->ps_mtx);
  495                         }
  496 
  497                         /*
  498                          * Do not go into sleep if this thread was the
  499                          * ptrace(2) attach leader.  cursig() consumed
  500                          * SIGSTOP from PT_ATTACH, but we usually act
  501                          * on the signal by interrupting sleep, and
  502                          * should do that here as well.
  503                          */
  504                         if ((td->td_dbgflags & TDB_FSTP) != 0) {
  505                                 if (ret == 0)
  506                                         ret = EINTR;
  507                                 td->td_dbgflags &= ~TDB_FSTP;
  508                         }
  509                 }
  510                 /*
  511                  * Lock the per-process spinlock prior to dropping the PROC_LOCK
  512                  * to avoid a signal delivery race.  PROC_LOCK, PROC_SLOCK, and
  513                  * thread_lock() are currently held in tdsendsignal().
  514                  */
  515                 PROC_SLOCK(p);
  516                 mtx_lock_spin(&sc->sc_lock);
  517                 PROC_UNLOCK(p);
  518                 thread_lock(td);
  519                 PROC_SUNLOCK(p);
  520         }
  521         if (ret == 0) {
  522                 sleepq_switch(wchan, pri);
  523                 return (0);
  524         }
  525 out:
  526         /*
  527          * There were pending signals and this thread is still
  528          * on the sleep queue, remove it from the sleep queue.
  529          */
  530         if (TD_ON_SLEEPQ(td)) {
  531                 sq = sleepq_lookup(wchan);
  532                 if (sleepq_resume_thread(sq, td, 0)) {
  533 #ifdef INVARIANTS
  534                         /*
  535                          * This thread hasn't gone to sleep yet, so it
  536                          * should not be swapped out.
  537                          */
  538                         panic("not waking up swapper");
  539 #endif
  540                 }
  541         }
  542         mtx_unlock_spin(&sc->sc_lock);
  543         MPASS(td->td_lock != &sc->sc_lock);
  544         return (ret);
  545 }
  546 
  547 /*
  548  * Switches to another thread if we are still asleep on a sleep queue.
  549  * Returns with thread lock.
  550  */
  551 static void
  552 sleepq_switch(void *wchan, int pri)
  553 {
  554         struct sleepqueue_chain *sc;
  555         struct sleepqueue *sq;
  556         struct thread *td;
  557         bool rtc_changed;
  558 
  559         td = curthread;
  560         sc = SC_LOOKUP(wchan);
  561         mtx_assert(&sc->sc_lock, MA_OWNED);
  562         THREAD_LOCK_ASSERT(td, MA_OWNED);
  563 
  564         /*
  565          * If we have a sleep queue, then we've already been woken up, so
  566          * just return.
  567          */
  568         if (td->td_sleepqueue != NULL) {
  569                 mtx_unlock_spin(&sc->sc_lock);
  570                 return;
  571         }
  572 
  573         /*
  574          * If TDF_TIMEOUT is set, then our sleep has been timed out
  575          * already but we are still on the sleep queue, so dequeue the
  576          * thread and return.
  577          *
  578          * Do the same if the real-time clock has been adjusted since this
  579          * thread calculated its timeout based on that clock.  This handles
  580          * the following race:
  581          * - The Ts thread needs to sleep until an absolute real-clock time.
  582          *   It copies the global rtc_generation into curthread->td_rtcgen,
  583          *   reads the RTC, and calculates a sleep duration based on that time.
  584          *   See umtxq_sleep() for an example.
  585          * - The Tc thread adjusts the RTC, bumps rtc_generation, and wakes
  586          *   threads that are sleeping until an absolute real-clock time.
  587          *   See tc_setclock() and the POSIX specification of clock_settime().
  588          * - Ts reaches the code below.  It holds the sleepqueue chain lock,
  589          *   so Tc has finished waking, so this thread must test td_rtcgen.
  590          * (The declaration of td_rtcgen refers to this comment.)
  591          */
  592         rtc_changed = td->td_rtcgen != 0 && td->td_rtcgen != rtc_generation;
  593         if ((td->td_flags & TDF_TIMEOUT) || rtc_changed) {
  594                 if (rtc_changed) {
  595                         td->td_rtcgen = 0;
  596                 }
  597                 MPASS(TD_ON_SLEEPQ(td));
  598                 sq = sleepq_lookup(wchan);
  599                 if (sleepq_resume_thread(sq, td, 0)) {
  600 #ifdef INVARIANTS
  601                         /*
  602                          * This thread hasn't gone to sleep yet, so it
  603                          * should not be swapped out.
  604                          */
  605                         panic("not waking up swapper");
  606 #endif
  607                 }
  608                 mtx_unlock_spin(&sc->sc_lock);
  609                 return;
  610         }
  611 #ifdef SLEEPQUEUE_PROFILING
  612         if (prof_enabled)
  613                 sleepq_profile(td->td_wmesg);
  614 #endif
  615         MPASS(td->td_sleepqueue == NULL);
  616         sched_sleep(td, pri);
  617         thread_lock_set(td, &sc->sc_lock);
  618         SDT_PROBE0(sched, , , sleep);
  619         TD_SET_SLEEPING(td);
  620         mi_switch(SW_VOL | SWT_SLEEPQ, NULL);
  621         KASSERT(TD_IS_RUNNING(td), ("running but not TDS_RUNNING"));
  622         CTR3(KTR_PROC, "sleepq resume: thread %p (pid %ld, %s)",
  623             (void *)td, (long)td->td_proc->p_pid, (void *)td->td_name);
  624 }
  625 
  626 /*
  627  * Check to see if we timed out.
  628  */
  629 static int
  630 sleepq_check_timeout(void)
  631 {
  632         struct thread *td;
  633         int res;
  634 
  635         td = curthread;
  636         THREAD_LOCK_ASSERT(td, MA_OWNED);
  637 
  638         /*
  639          * If TDF_TIMEOUT is set, we timed out.  But recheck
  640          * td_sleeptimo anyway.
  641          */
  642         res = 0;
  643         if (td->td_sleeptimo != 0) {
  644                 if (td->td_sleeptimo <= sbinuptime())
  645                         res = EWOULDBLOCK;
  646                 td->td_sleeptimo = 0;
  647         }
  648         if (td->td_flags & TDF_TIMEOUT)
  649                 td->td_flags &= ~TDF_TIMEOUT;
  650         else
  651                 /*
  652                  * We ignore the situation where timeout subsystem was
  653                  * unable to stop our callout.  The struct thread is
  654                  * type-stable, the callout will use the correct
  655                  * memory when running.  The checks of the
  656                  * td_sleeptimo value in this function and in
  657                  * sleepq_timeout() ensure that the thread does not
  658                  * get spurious wakeups, even if the callout was reset
  659                  * or thread reused.
  660                  */
  661                 callout_stop(&td->td_slpcallout);
  662         return (res);
  663 }
  664 
  665 /*
  666  * Check to see if we were awoken by a signal.
  667  */
  668 static int
  669 sleepq_check_signals(void)
  670 {
  671         struct thread *td;
  672 
  673         td = curthread;
  674         THREAD_LOCK_ASSERT(td, MA_OWNED);
  675 
  676         /* We are no longer in an interruptible sleep. */
  677         if (td->td_flags & TDF_SINTR)
  678                 td->td_flags &= ~TDF_SINTR;
  679 
  680         if (td->td_flags & TDF_SLEEPABORT) {
  681                 td->td_flags &= ~TDF_SLEEPABORT;
  682                 return (td->td_intrval);
  683         }
  684 
  685         return (0);
  686 }
  687 
  688 /*
  689  * Block the current thread until it is awakened from its sleep queue.
  690  */
  691 void
  692 sleepq_wait(void *wchan, int pri)
  693 {
  694         struct thread *td;
  695 
  696         td = curthread;
  697         MPASS(!(td->td_flags & TDF_SINTR));
  698         thread_lock(td);
  699         sleepq_switch(wchan, pri);
  700         thread_unlock(td);
  701 }
  702 
  703 /*
  704  * Block the current thread until it is awakened from its sleep queue
  705  * or it is interrupted by a signal.
  706  */
  707 int
  708 sleepq_wait_sig(void *wchan, int pri)
  709 {
  710         int rcatch;
  711         int rval;
  712 
  713         rcatch = sleepq_catch_signals(wchan, pri);
  714         rval = sleepq_check_signals();
  715         thread_unlock(curthread);
  716         if (rcatch)
  717                 return (rcatch);
  718         return (rval);
  719 }
  720 
  721 /*
  722  * Block the current thread until it is awakened from its sleep queue
  723  * or it times out while waiting.
  724  */
  725 int
  726 sleepq_timedwait(void *wchan, int pri)
  727 {
  728         struct thread *td;
  729         int rval;
  730 
  731         td = curthread;
  732         MPASS(!(td->td_flags & TDF_SINTR));
  733         thread_lock(td);
  734         sleepq_switch(wchan, pri);
  735         rval = sleepq_check_timeout();
  736         thread_unlock(td);
  737 
  738         return (rval);
  739 }
  740 
  741 /*
  742  * Block the current thread until it is awakened from its sleep queue,
  743  * it is interrupted by a signal, or it times out waiting to be awakened.
  744  */
  745 int
  746 sleepq_timedwait_sig(void *wchan, int pri)
  747 {
  748         int rcatch, rvalt, rvals;
  749 
  750         rcatch = sleepq_catch_signals(wchan, pri);
  751         rvalt = sleepq_check_timeout();
  752         rvals = sleepq_check_signals();
  753         thread_unlock(curthread);
  754         if (rcatch)
  755                 return (rcatch);
  756         if (rvals)
  757                 return (rvals);
  758         return (rvalt);
  759 }
  760 
  761 /*
  762  * Returns the type of sleepqueue given a waitchannel.
  763  */
  764 int
  765 sleepq_type(void *wchan)
  766 {
  767         struct sleepqueue *sq;
  768         int type;
  769 
  770         MPASS(wchan != NULL);
  771 
  772         sleepq_lock(wchan);
  773         sq = sleepq_lookup(wchan);
  774         if (sq == NULL) {
  775                 sleepq_release(wchan);
  776                 return (-1);
  777         }
  778         type = sq->sq_type;
  779         sleepq_release(wchan);
  780         return (type);
  781 }
  782 
  783 /*
  784  * Removes a thread from a sleep queue and makes it
  785  * runnable.
  786  */
  787 static int
  788 sleepq_resume_thread(struct sleepqueue *sq, struct thread *td, int pri)
  789 {
  790         struct sleepqueue_chain *sc;
  791 
  792         MPASS(td != NULL);
  793         MPASS(sq->sq_wchan != NULL);
  794         MPASS(td->td_wchan == sq->sq_wchan);
  795         MPASS(td->td_sqqueue < NR_SLEEPQS && td->td_sqqueue >= 0);
  796         THREAD_LOCK_ASSERT(td, MA_OWNED);
  797         sc = SC_LOOKUP(sq->sq_wchan);
  798         mtx_assert(&sc->sc_lock, MA_OWNED);
  799 
  800         SDT_PROBE2(sched, , , wakeup, td, td->td_proc);
  801 
  802         /* Remove the thread from the queue. */
  803         sq->sq_blockedcnt[td->td_sqqueue]--;
  804         TAILQ_REMOVE(&sq->sq_blocked[td->td_sqqueue], td, td_slpq);
  805 
  806         /*
  807          * Get a sleep queue for this thread.  If this is the last waiter,
  808          * use the queue itself and take it out of the chain, otherwise,
  809          * remove a queue from the free list.
  810          */
  811         if (LIST_EMPTY(&sq->sq_free)) {
  812                 td->td_sleepqueue = sq;
  813 #ifdef INVARIANTS
  814                 sq->sq_wchan = NULL;
  815 #endif
  816 #ifdef SLEEPQUEUE_PROFILING
  817                 sc->sc_depth--;
  818 #endif
  819         } else
  820                 td->td_sleepqueue = LIST_FIRST(&sq->sq_free);
  821         LIST_REMOVE(td->td_sleepqueue, sq_hash);
  822 
  823         td->td_wmesg = NULL;
  824         td->td_wchan = NULL;
  825         td->td_flags &= ~TDF_SINTR;
  826 
  827         CTR3(KTR_PROC, "sleepq_wakeup: thread %p (pid %ld, %s)",
  828             (void *)td, (long)td->td_proc->p_pid, td->td_name);
  829 
  830         /* Adjust priority if requested. */
  831         MPASS(pri == 0 || (pri >= PRI_MIN && pri <= PRI_MAX));
  832         if (pri != 0 && td->td_priority > pri &&
  833             PRI_BASE(td->td_pri_class) == PRI_TIMESHARE)
  834                 sched_prio(td, pri);
  835 
  836         /*
  837          * Note that thread td might not be sleeping if it is running
  838          * sleepq_catch_signals() on another CPU or is blocked on its
  839          * proc lock to check signals.  There's no need to mark the
  840          * thread runnable in that case.
  841          */
  842         if (TD_IS_SLEEPING(td)) {
  843                 TD_CLR_SLEEPING(td);
  844                 return (setrunnable(td));
  845         }
  846         return (0);
  847 }
  848 
  849 #ifdef INVARIANTS
  850 /*
  851  * UMA zone item deallocator.
  852  */
  853 static void
  854 sleepq_dtor(void *mem, int size, void *arg)
  855 {
  856         struct sleepqueue *sq;
  857         int i;
  858 
  859         sq = mem;
  860         for (i = 0; i < NR_SLEEPQS; i++) {
  861                 MPASS(TAILQ_EMPTY(&sq->sq_blocked[i]));
  862                 MPASS(sq->sq_blockedcnt[i] == 0);
  863         }
  864 }
  865 #endif
  866 
  867 /*
  868  * UMA zone item initializer.
  869  */
  870 static int
  871 sleepq_init(void *mem, int size, int flags)
  872 {
  873         struct sleepqueue *sq;
  874         int i;
  875 
  876         bzero(mem, size);
  877         sq = mem;
  878         for (i = 0; i < NR_SLEEPQS; i++) {
  879                 TAILQ_INIT(&sq->sq_blocked[i]);
  880                 sq->sq_blockedcnt[i] = 0;
  881         }
  882         LIST_INIT(&sq->sq_free);
  883         return (0);
  884 }
  885 
  886 /*
  887  * Find thread sleeping on a wait channel and resume it.
  888  */
  889 int
  890 sleepq_signal(void *wchan, int flags, int pri, int queue)
  891 {
  892         struct sleepqueue_chain *sc;
  893         struct sleepqueue *sq;
  894         struct threadqueue *head;
  895         struct thread *td, *besttd;
  896         int wakeup_swapper;
  897 
  898         CTR2(KTR_PROC, "sleepq_signal(%p, %d)", wchan, flags);
  899         KASSERT(wchan != NULL, ("%s: invalid NULL wait channel", __func__));
  900         MPASS((queue >= 0) && (queue < NR_SLEEPQS));
  901         sq = sleepq_lookup(wchan);
  902         if (sq == NULL)
  903                 return (0);
  904         KASSERT(sq->sq_type == (flags & SLEEPQ_TYPE),
  905             ("%s: mismatch between sleep/wakeup and cv_*", __func__));
  906 
  907         head = &sq->sq_blocked[queue];
  908         if (flags & SLEEPQ_UNFAIR) {
  909                 /*
  910                  * Find the most recently sleeping thread, but try to
  911                  * skip threads still in process of context switch to
  912                  * avoid spinning on the thread lock.
  913                  */
  914                 sc = SC_LOOKUP(wchan);
  915                 besttd = TAILQ_LAST_FAST(head, thread, td_slpq);
  916                 while (besttd->td_lock != &sc->sc_lock) {
  917                         td = TAILQ_PREV_FAST(besttd, head, thread, td_slpq);
  918                         if (td == NULL)
  919                                 break;
  920                         besttd = td;
  921                 }
  922         } else {
  923                 /*
  924                  * Find the highest priority thread on the queue.  If there
  925                  * is a tie, use the thread that first appears in the queue
  926                  * as it has been sleeping the longest since threads are
  927                  * always added to the tail of sleep queues.
  928                  */
  929                 besttd = td = TAILQ_FIRST(head);
  930                 while ((td = TAILQ_NEXT(td, td_slpq)) != NULL) {
  931                         if (td->td_priority < besttd->td_priority)
  932                                 besttd = td;
  933                 }
  934         }
  935         MPASS(besttd != NULL);
  936         thread_lock(besttd);
  937         wakeup_swapper = sleepq_resume_thread(sq, besttd, pri);
  938         thread_unlock(besttd);
  939         return (wakeup_swapper);
  940 }
  941 
  942 static bool
  943 match_any(struct thread *td __unused)
  944 {
  945 
  946         return (true);
  947 }
  948 
  949 /*
  950  * Resume all threads sleeping on a specified wait channel.
  951  */
  952 int
  953 sleepq_broadcast(void *wchan, int flags, int pri, int queue)
  954 {
  955         struct sleepqueue *sq;
  956 
  957         CTR2(KTR_PROC, "sleepq_broadcast(%p, %d)", wchan, flags);
  958         KASSERT(wchan != NULL, ("%s: invalid NULL wait channel", __func__));
  959         MPASS((queue >= 0) && (queue < NR_SLEEPQS));
  960         sq = sleepq_lookup(wchan);
  961         if (sq == NULL)
  962                 return (0);
  963         KASSERT(sq->sq_type == (flags & SLEEPQ_TYPE),
  964             ("%s: mismatch between sleep/wakeup and cv_*", __func__));
  965 
  966         return (sleepq_remove_matching(sq, queue, match_any, pri));
  967 }
  968 
  969 /*
  970  * Resume threads on the sleep queue that match the given predicate.
  971  */
  972 int
  973 sleepq_remove_matching(struct sleepqueue *sq, int queue,
  974     bool (*matches)(struct thread *), int pri)
  975 {
  976         struct thread *td, *tdn;
  977         int wakeup_swapper;
  978 
  979         /*
  980          * The last thread will be given ownership of sq and may
  981          * re-enqueue itself before sleepq_resume_thread() returns,
  982          * so we must cache the "next" queue item at the beginning
  983          * of the final iteration.
  984          */
  985         wakeup_swapper = 0;
  986         TAILQ_FOREACH_SAFE(td, &sq->sq_blocked[queue], td_slpq, tdn) {
  987                 thread_lock(td);
  988                 if (matches(td))
  989                         wakeup_swapper |= sleepq_resume_thread(sq, td, pri);
  990                 thread_unlock(td);
  991         }
  992 
  993         return (wakeup_swapper);
  994 }
  995 
  996 /*
  997  * Time sleeping threads out.  When the timeout expires, the thread is
  998  * removed from the sleep queue and made runnable if it is still asleep.
  999  */
 1000 static void
 1001 sleepq_timeout(void *arg)
 1002 {
 1003         struct sleepqueue_chain *sc;
 1004         struct sleepqueue *sq;
 1005         struct thread *td;
 1006         void *wchan;
 1007         int wakeup_swapper;
 1008 
 1009         td = arg;
 1010         wakeup_swapper = 0;
 1011         CTR3(KTR_PROC, "sleepq_timeout: thread %p (pid %ld, %s)",
 1012             (void *)td, (long)td->td_proc->p_pid, (void *)td->td_name);
 1013 
 1014         thread_lock(td);
 1015 
 1016         if (td->td_sleeptimo > sbinuptime() || td->td_sleeptimo == 0) {
 1017                 /*
 1018                  * The thread does not want a timeout (yet).
 1019                  */
 1020         } else if (TD_IS_SLEEPING(td) && TD_ON_SLEEPQ(td)) {
 1021                 /*
 1022                  * See if the thread is asleep and get the wait
 1023                  * channel if it is.
 1024                  */
 1025                 wchan = td->td_wchan;
 1026                 sc = SC_LOOKUP(wchan);
 1027                 THREAD_LOCKPTR_ASSERT(td, &sc->sc_lock);
 1028                 sq = sleepq_lookup(wchan);
 1029                 MPASS(sq != NULL);
 1030                 td->td_flags |= TDF_TIMEOUT;
 1031                 wakeup_swapper = sleepq_resume_thread(sq, td, 0);
 1032         } else if (TD_ON_SLEEPQ(td)) {
 1033                 /*
 1034                  * If the thread is on the SLEEPQ but isn't sleeping
 1035                  * yet, it can either be on another CPU in between
 1036                  * sleepq_add() and one of the sleepq_*wait*()
 1037                  * routines or it can be in sleepq_catch_signals().
 1038                  */
 1039                 td->td_flags |= TDF_TIMEOUT;
 1040         }
 1041 
 1042         thread_unlock(td);
 1043         if (wakeup_swapper)
 1044                 kick_proc0();
 1045 }
 1046 
 1047 /*
 1048  * Resumes a specific thread from the sleep queue associated with a specific
 1049  * wait channel if it is on that queue.
 1050  */
 1051 void
 1052 sleepq_remove(struct thread *td, void *wchan)
 1053 {
 1054         struct sleepqueue *sq;
 1055         int wakeup_swapper;
 1056 
 1057         /*
 1058          * Look up the sleep queue for this wait channel, then re-check
 1059          * that the thread is asleep on that channel, if it is not, then
 1060          * bail.
 1061          */
 1062         MPASS(wchan != NULL);
 1063         sleepq_lock(wchan);
 1064         sq = sleepq_lookup(wchan);
 1065         /*
 1066          * We can not lock the thread here as it may be sleeping on a
 1067          * different sleepq.  However, holding the sleepq lock for this
 1068          * wchan can guarantee that we do not miss a wakeup for this
 1069          * channel.  The asserts below will catch any false positives.
 1070          */
 1071         if (!TD_ON_SLEEPQ(td) || td->td_wchan != wchan) {
 1072                 sleepq_release(wchan);
 1073                 return;
 1074         }
 1075         /* Thread is asleep on sleep queue sq, so wake it up. */
 1076         thread_lock(td);
 1077         MPASS(sq != NULL);
 1078         MPASS(td->td_wchan == wchan);
 1079         wakeup_swapper = sleepq_resume_thread(sq, td, 0);
 1080         thread_unlock(td);
 1081         sleepq_release(wchan);
 1082         if (wakeup_swapper)
 1083                 kick_proc0();
 1084 }
 1085 
 1086 /*
 1087  * Abort a thread as if an interrupt had occurred.  Only abort
 1088  * interruptible waits (unfortunately it isn't safe to abort others).
 1089  */
 1090 int
 1091 sleepq_abort(struct thread *td, int intrval)
 1092 {
 1093         struct sleepqueue *sq;
 1094         void *wchan;
 1095 
 1096         THREAD_LOCK_ASSERT(td, MA_OWNED);
 1097         MPASS(TD_ON_SLEEPQ(td));
 1098         MPASS(td->td_flags & TDF_SINTR);
 1099         MPASS(intrval == EINTR || intrval == ERESTART);
 1100 
 1101         /*
 1102          * If the TDF_TIMEOUT flag is set, just leave. A
 1103          * timeout is scheduled anyhow.
 1104          */
 1105         if (td->td_flags & TDF_TIMEOUT)
 1106                 return (0);
 1107 
 1108         CTR3(KTR_PROC, "sleepq_abort: thread %p (pid %ld, %s)",
 1109             (void *)td, (long)td->td_proc->p_pid, (void *)td->td_name);
 1110         td->td_intrval = intrval;
 1111         td->td_flags |= TDF_SLEEPABORT;
 1112         /*
 1113          * If the thread has not slept yet it will find the signal in
 1114          * sleepq_catch_signals() and call sleepq_resume_thread.  Otherwise
 1115          * we have to do it here.
 1116          */
 1117         if (!TD_IS_SLEEPING(td))
 1118                 return (0);
 1119         wchan = td->td_wchan;
 1120         MPASS(wchan != NULL);
 1121         sq = sleepq_lookup(wchan);
 1122         MPASS(sq != NULL);
 1123 
 1124         /* Thread is asleep on sleep queue sq, so wake it up. */
 1125         return (sleepq_resume_thread(sq, td, 0));
 1126 }
 1127 
 1128 void
 1129 sleepq_chains_remove_matching(bool (*matches)(struct thread *))
 1130 {
 1131         struct sleepqueue_chain *sc;
 1132         struct sleepqueue *sq, *sq1;
 1133         int i, wakeup_swapper;
 1134 
 1135         wakeup_swapper = 0;
 1136         for (sc = &sleepq_chains[0]; sc < sleepq_chains + SC_TABLESIZE; ++sc) {
 1137                 if (LIST_EMPTY(&sc->sc_queues)) {
 1138                         continue;
 1139                 }
 1140                 mtx_lock_spin(&sc->sc_lock);
 1141                 LIST_FOREACH_SAFE(sq, &sc->sc_queues, sq_hash, sq1) {
 1142                         for (i = 0; i < NR_SLEEPQS; ++i) {
 1143                                 wakeup_swapper |= sleepq_remove_matching(sq, i,
 1144                                     matches, 0);
 1145                         }
 1146                 }
 1147                 mtx_unlock_spin(&sc->sc_lock);
 1148         }
 1149         if (wakeup_swapper) {
 1150                 kick_proc0();
 1151         }
 1152 }
 1153 
 1154 /*
 1155  * Prints the stacks of all threads presently sleeping on wchan/queue to
 1156  * the sbuf sb.  Sets count_stacks_printed to the number of stacks actually
 1157  * printed.  Typically, this will equal the number of threads sleeping on the
 1158  * queue, but may be less if sb overflowed before all stacks were printed.
 1159  */
 1160 #ifdef STACK
 1161 int
 1162 sleepq_sbuf_print_stacks(struct sbuf *sb, void *wchan, int queue,
 1163     int *count_stacks_printed)
 1164 {
 1165         struct thread *td, *td_next;
 1166         struct sleepqueue *sq;
 1167         struct stack **st;
 1168         struct sbuf **td_infos;
 1169         int i, stack_idx, error, stacks_to_allocate;
 1170         bool finished, partial_print;
 1171 
 1172         error = 0;
 1173         finished = false;
 1174         partial_print = false;
 1175 
 1176         KASSERT(wchan != NULL, ("%s: invalid NULL wait channel", __func__));
 1177         MPASS((queue >= 0) && (queue < NR_SLEEPQS));
 1178 
 1179         stacks_to_allocate = 10;
 1180         for (i = 0; i < 3 && !finished ; i++) {
 1181                 /* We cannot malloc while holding the queue's spinlock, so
 1182                  * we do our mallocs now, and hope it is enough.  If it
 1183                  * isn't, we will free these, drop the lock, malloc more,
 1184                  * and try again, up to a point.  After that point we will
 1185                  * give up and report ENOMEM. We also cannot write to sb
 1186                  * during this time since the client may have set the
 1187                  * SBUF_AUTOEXTEND flag on their sbuf, which could cause a
 1188                  * malloc as we print to it.  So we defer actually printing
 1189                  * to sb until after we drop the spinlock.
 1190                  */
 1191 
 1192                 /* Where we will store the stacks. */
 1193                 st = malloc(sizeof(struct stack *) * stacks_to_allocate,
 1194                     M_TEMP, M_WAITOK);
 1195                 for (stack_idx = 0; stack_idx < stacks_to_allocate;
 1196                     stack_idx++)
 1197                         st[stack_idx] = stack_create();
 1198 
 1199                 /* Where we will store the td name, tid, etc. */
 1200                 td_infos = malloc(sizeof(struct sbuf *) * stacks_to_allocate,
 1201                     M_TEMP, M_WAITOK);
 1202                 for (stack_idx = 0; stack_idx < stacks_to_allocate;
 1203                     stack_idx++)
 1204                         td_infos[stack_idx] = sbuf_new(NULL, NULL,
 1205                             MAXCOMLEN + sizeof(struct thread *) * 2 + 40,
 1206                             SBUF_FIXEDLEN);
 1207 
 1208                 sleepq_lock(wchan);
 1209                 sq = sleepq_lookup(wchan);
 1210                 if (sq == NULL) {
 1211                         /* This sleepq does not exist; exit and return ENOENT. */
 1212                         error = ENOENT;
 1213                         finished = true;
 1214                         sleepq_release(wchan);
 1215                         goto loop_end;
 1216                 }
 1217 
 1218                 stack_idx = 0;
 1219                 /* Save thread info */
 1220                 TAILQ_FOREACH_SAFE(td, &sq->sq_blocked[queue], td_slpq,
 1221                     td_next) {
 1222                         if (stack_idx >= stacks_to_allocate)
 1223                                 goto loop_end;
 1224 
 1225                         /* Note the td_lock is equal to the sleepq_lock here. */
 1226                         stack_save_td(st[stack_idx], td);
 1227 
 1228                         sbuf_printf(td_infos[stack_idx], "%d: %s %p",
 1229                             td->td_tid, td->td_name, td);
 1230 
 1231                         ++stack_idx;
 1232                 }
 1233 
 1234                 finished = true;
 1235                 sleepq_release(wchan);
 1236 
 1237                 /* Print the stacks */
 1238                 for (i = 0; i < stack_idx; i++) {
 1239                         sbuf_finish(td_infos[i]);
 1240                         sbuf_printf(sb, "--- thread %s: ---\n", sbuf_data(td_infos[i]));
 1241                         stack_sbuf_print(sb, st[i]);
 1242                         sbuf_printf(sb, "\n");
 1243 
 1244                         error = sbuf_error(sb);
 1245                         if (error == 0)
 1246                                 *count_stacks_printed = stack_idx;
 1247                 }
 1248 
 1249 loop_end:
 1250                 if (!finished)
 1251                         sleepq_release(wchan);
 1252                 for (stack_idx = 0; stack_idx < stacks_to_allocate;
 1253                     stack_idx++)
 1254                         stack_destroy(st[stack_idx]);
 1255                 for (stack_idx = 0; stack_idx < stacks_to_allocate;
 1256                     stack_idx++)
 1257                         sbuf_delete(td_infos[stack_idx]);
 1258                 free(st, M_TEMP);
 1259                 free(td_infos, M_TEMP);
 1260                 stacks_to_allocate *= 10;
 1261         }
 1262 
 1263         if (!finished && error == 0)
 1264                 error = ENOMEM;
 1265 
 1266         return (error);
 1267 }
 1268 #endif
 1269 
 1270 #ifdef SLEEPQUEUE_PROFILING
 1271 #define SLEEPQ_PROF_LOCATIONS   1024
 1272 #define SLEEPQ_SBUFSIZE         512
 1273 struct sleepq_prof {
 1274         LIST_ENTRY(sleepq_prof) sp_link;
 1275         const char      *sp_wmesg;
 1276         long            sp_count;
 1277 };
 1278 
 1279 LIST_HEAD(sqphead, sleepq_prof);
 1280 
 1281 struct sqphead sleepq_prof_free;
 1282 struct sqphead sleepq_hash[SC_TABLESIZE];
 1283 static struct sleepq_prof sleepq_profent[SLEEPQ_PROF_LOCATIONS];
 1284 static struct mtx sleepq_prof_lock;
 1285 MTX_SYSINIT(sleepq_prof_lock, &sleepq_prof_lock, "sleepq_prof", MTX_SPIN);
 1286 
 1287 static void
 1288 sleepq_profile(const char *wmesg)
 1289 {
 1290         struct sleepq_prof *sp;
 1291 
 1292         mtx_lock_spin(&sleepq_prof_lock);
 1293         if (prof_enabled == 0)
 1294                 goto unlock;
 1295         LIST_FOREACH(sp, &sleepq_hash[SC_HASH(wmesg)], sp_link)
 1296                 if (sp->sp_wmesg == wmesg)
 1297                         goto done;
 1298         sp = LIST_FIRST(&sleepq_prof_free);
 1299         if (sp == NULL)
 1300                 goto unlock;
 1301         sp->sp_wmesg = wmesg;
 1302         LIST_REMOVE(sp, sp_link);
 1303         LIST_INSERT_HEAD(&sleepq_hash[SC_HASH(wmesg)], sp, sp_link);
 1304 done:
 1305         sp->sp_count++;
 1306 unlock:
 1307         mtx_unlock_spin(&sleepq_prof_lock);
 1308         return;
 1309 }
 1310 
 1311 static void
 1312 sleepq_prof_reset(void)
 1313 {
 1314         struct sleepq_prof *sp;
 1315         int enabled;
 1316         int i;
 1317 
 1318         mtx_lock_spin(&sleepq_prof_lock);
 1319         enabled = prof_enabled;
 1320         prof_enabled = 0;
 1321         for (i = 0; i < SC_TABLESIZE; i++)
 1322                 LIST_INIT(&sleepq_hash[i]);
 1323         LIST_INIT(&sleepq_prof_free);
 1324         for (i = 0; i < SLEEPQ_PROF_LOCATIONS; i++) {
 1325                 sp = &sleepq_profent[i];
 1326                 sp->sp_wmesg = NULL;
 1327                 sp->sp_count = 0;
 1328                 LIST_INSERT_HEAD(&sleepq_prof_free, sp, sp_link);
 1329         }
 1330         prof_enabled = enabled;
 1331         mtx_unlock_spin(&sleepq_prof_lock);
 1332 }
 1333 
 1334 static int
 1335 enable_sleepq_prof(SYSCTL_HANDLER_ARGS)
 1336 {
 1337         int error, v;
 1338 
 1339         v = prof_enabled;
 1340         error = sysctl_handle_int(oidp, &v, v, req);
 1341         if (error)
 1342                 return (error);
 1343         if (req->newptr == NULL)
 1344                 return (error);
 1345         if (v == prof_enabled)
 1346                 return (0);
 1347         if (v == 1)
 1348                 sleepq_prof_reset();
 1349         mtx_lock_spin(&sleepq_prof_lock);
 1350         prof_enabled = !!v;
 1351         mtx_unlock_spin(&sleepq_prof_lock);
 1352 
 1353         return (0);
 1354 }
 1355 
 1356 static int
 1357 reset_sleepq_prof_stats(SYSCTL_HANDLER_ARGS)
 1358 {
 1359         int error, v;
 1360 
 1361         v = 0;
 1362         error = sysctl_handle_int(oidp, &v, 0, req);
 1363         if (error)
 1364                 return (error);
 1365         if (req->newptr == NULL)
 1366                 return (error);
 1367         if (v == 0)
 1368                 return (0);
 1369         sleepq_prof_reset();
 1370 
 1371         return (0);
 1372 }
 1373 
 1374 static int
 1375 dump_sleepq_prof_stats(SYSCTL_HANDLER_ARGS)
 1376 {
 1377         struct sleepq_prof *sp;
 1378         struct sbuf *sb;
 1379         int enabled;
 1380         int error;
 1381         int i;
 1382 
 1383         error = sysctl_wire_old_buffer(req, 0);
 1384         if (error != 0)
 1385                 return (error);
 1386         sb = sbuf_new_for_sysctl(NULL, NULL, SLEEPQ_SBUFSIZE, req);
 1387         sbuf_printf(sb, "\nwmesg\tcount\n");
 1388         enabled = prof_enabled;
 1389         mtx_lock_spin(&sleepq_prof_lock);
 1390         prof_enabled = 0;
 1391         mtx_unlock_spin(&sleepq_prof_lock);
 1392         for (i = 0; i < SC_TABLESIZE; i++) {
 1393                 LIST_FOREACH(sp, &sleepq_hash[i], sp_link) {
 1394                         sbuf_printf(sb, "%s\t%ld\n",
 1395                             sp->sp_wmesg, sp->sp_count);
 1396                 }
 1397         }
 1398         mtx_lock_spin(&sleepq_prof_lock);
 1399         prof_enabled = enabled;
 1400         mtx_unlock_spin(&sleepq_prof_lock);
 1401 
 1402         error = sbuf_finish(sb);
 1403         sbuf_delete(sb);
 1404         return (error);
 1405 }
 1406 
 1407 SYSCTL_PROC(_debug_sleepq, OID_AUTO, stats, CTLTYPE_STRING | CTLFLAG_RD,
 1408     NULL, 0, dump_sleepq_prof_stats, "A", "Sleepqueue profiling statistics");
 1409 SYSCTL_PROC(_debug_sleepq, OID_AUTO, reset, CTLTYPE_INT | CTLFLAG_RW,
 1410     NULL, 0, reset_sleepq_prof_stats, "I",
 1411     "Reset sleepqueue profiling statistics");
 1412 SYSCTL_PROC(_debug_sleepq, OID_AUTO, enable, CTLTYPE_INT | CTLFLAG_RW,
 1413     NULL, 0, enable_sleepq_prof, "I", "Enable sleepqueue profiling");
 1414 #endif
 1415 
 1416 #ifdef DDB
 1417 DB_SHOW_COMMAND(sleepq, db_show_sleepqueue)
 1418 {
 1419         struct sleepqueue_chain *sc;
 1420         struct sleepqueue *sq;
 1421 #ifdef INVARIANTS
 1422         struct lock_object *lock;
 1423 #endif
 1424         struct thread *td;
 1425         void *wchan;
 1426         int i;
 1427 
 1428         if (!have_addr)
 1429                 return;
 1430 
 1431         /*
 1432          * First, see if there is an active sleep queue for the wait channel
 1433          * indicated by the address.
 1434          */
 1435         wchan = (void *)addr;
 1436         sc = SC_LOOKUP(wchan);
 1437         LIST_FOREACH(sq, &sc->sc_queues, sq_hash)
 1438                 if (sq->sq_wchan == wchan)
 1439                         goto found;
 1440 
 1441         /*
 1442          * Second, see if there is an active sleep queue at the address
 1443          * indicated.
 1444          */
 1445         for (i = 0; i < SC_TABLESIZE; i++)
 1446                 LIST_FOREACH(sq, &sleepq_chains[i].sc_queues, sq_hash) {
 1447                         if (sq == (struct sleepqueue *)addr)
 1448                                 goto found;
 1449                 }
 1450 
 1451         db_printf("Unable to locate a sleep queue via %p\n", (void *)addr);
 1452         return;
 1453 found:
 1454         db_printf("Wait channel: %p\n", sq->sq_wchan);
 1455         db_printf("Queue type: %d\n", sq->sq_type);
 1456 #ifdef INVARIANTS
 1457         if (sq->sq_lock) {
 1458                 lock = sq->sq_lock;
 1459                 db_printf("Associated Interlock: %p - (%s) %s\n", lock,
 1460                     LOCK_CLASS(lock)->lc_name, lock->lo_name);
 1461         }
 1462 #endif
 1463         db_printf("Blocked threads:\n");
 1464         for (i = 0; i < NR_SLEEPQS; i++) {
 1465                 db_printf("\nQueue[%d]:\n", i);
 1466                 if (TAILQ_EMPTY(&sq->sq_blocked[i]))
 1467                         db_printf("\tempty\n");
 1468                 else
 1469                         TAILQ_FOREACH(td, &sq->sq_blocked[i],
 1470                                       td_slpq) {
 1471                                 db_printf("\t%p (tid %d, pid %d, \"%s\")\n", td,
 1472                                           td->td_tid, td->td_proc->p_pid,
 1473                                           td->td_name);
 1474                         }
 1475                 db_printf("(expected: %u)\n", sq->sq_blockedcnt[i]);
 1476         }
 1477 }
 1478 
 1479 /* Alias 'show sleepqueue' to 'show sleepq'. */
 1480 DB_SHOW_ALIAS(sleepqueue, db_show_sleepqueue);
 1481 #endif
Cache object: 0d7fc652e9b7ad5477dc9b0c6b34133e
[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]
This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.
FreeBSD/Linux Kernel Cross Reference sys/kern/subr_sleepqueue.c

FreeBSD/Linux Kernel Cross Reference
sys/kern/subr_sleepqueue.c