The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/subr_sleepqueue.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
    3  *
    4  * Copyright (c) 2004 John Baldwin <jhb@FreeBSD.org>
    5  * All rights reserved.
    6  *
    7  * Redistribution and use in source and binary forms, with or without
    8  * modification, are permitted provided that the following conditions
    9  * are met:
   10  * 1. Redistributions of source code must retain the above copyright
   11  *    notice, this list of conditions and the following disclaimer.
   12  * 2. Redistributions in binary form must reproduce the above copyright
   13  *    notice, this list of conditions and the following disclaimer in the
   14  *    documentation and/or other materials provided with the distribution.
   15  *
   16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   26  * SUCH DAMAGE.
   27  */
   28 
   29 /*
   30  * Implementation of sleep queues used to hold queue of threads blocked on
   31  * a wait channel.  Sleep queues are different from turnstiles in that wait
   32  * channels are not owned by anyone, so there is no priority propagation.
   33  * Sleep queues can also provide a timeout and can also be interrupted by
   34  * signals.  That said, there are several similarities between the turnstile
   35  * and sleep queue implementations.  (Note: turnstiles were implemented
   36  * first.)  For example, both use a hash table of the same size where each
   37  * bucket is referred to as a "chain" that contains both a spin lock and
   38  * a linked list of queues.  An individual queue is located by using a hash
   39  * to pick a chain, locking the chain, and then walking the chain searching
   40  * for the queue.  This means that a wait channel object does not need to
   41  * embed its queue head just as locks do not embed their turnstile queue
   42  * head.  Threads also carry around a sleep queue that they lend to the
   43  * wait channel when blocking.  Just as in turnstiles, the queue includes
   44  * a free list of the sleep queues of other threads blocked on the same
   45  * wait channel in the case of multiple waiters.
   46  *
   47  * Some additional functionality provided by sleep queues include the
   48  * ability to set a timeout.  The timeout is managed using a per-thread
   49  * callout that resumes a thread if it is asleep.  A thread may also
   50  * catch signals while it is asleep (aka an interruptible sleep).  The
   51  * signal code uses sleepq_abort() to interrupt a sleeping thread.  Finally,
   52  * sleep queues also provide some extra assertions.  One is not allowed to
   53  * mix the sleep/wakeup and cv APIs for a given wait channel.  Also, one
   54  * must consistently use the same lock to synchronize with a wait channel,
   55  * though this check is currently only a warning for sleep/wakeup due to
   56  * pre-existing abuse of that API.  The same lock must also be held when
   57  * awakening threads, though that is currently only enforced for condition
   58  * variables.
   59  */
   60 
   61 #include <sys/cdefs.h>
   62 __FBSDID("$FreeBSD: releng/12.0/sys/kern/subr_sleepqueue.c 333853 2018-05-19 05:00:16Z mmacy $");
   63 
   64 #include "opt_sleepqueue_profiling.h"
   65 #include "opt_ddb.h"
   66 #include "opt_sched.h"
   67 #include "opt_stack.h"
   68 
   69 #include <sys/param.h>
   70 #include <sys/systm.h>
   71 #include <sys/lock.h>
   72 #include <sys/kernel.h>
   73 #include <sys/ktr.h>
   74 #include <sys/mutex.h>
   75 #include <sys/proc.h>
   76 #include <sys/sbuf.h>
   77 #include <sys/sched.h>
   78 #include <sys/sdt.h>
   79 #include <sys/signalvar.h>
   80 #include <sys/sleepqueue.h>
   81 #include <sys/stack.h>
   82 #include <sys/sysctl.h>
   83 #include <sys/time.h>
   84 
   85 #include <machine/atomic.h>
   86 
   87 #include <vm/uma.h>
   88 
   89 #ifdef DDB
   90 #include <ddb/ddb.h>
   91 #endif
   92 
   93 
   94 /*
   95  * Constants for the hash table of sleep queue chains.
   96  * SC_TABLESIZE must be a power of two for SC_MASK to work properly.
   97  */
   98 #ifndef SC_TABLESIZE
   99 #define SC_TABLESIZE    256
  100 #endif
  101 CTASSERT(powerof2(SC_TABLESIZE));
  102 #define SC_MASK         (SC_TABLESIZE - 1)
  103 #define SC_SHIFT        8
  104 #define SC_HASH(wc)     ((((uintptr_t)(wc) >> SC_SHIFT) ^ (uintptr_t)(wc)) & \
  105                             SC_MASK)
  106 #define SC_LOOKUP(wc)   &sleepq_chains[SC_HASH(wc)]
  107 #define NR_SLEEPQS      2
  108 /*
  109  * There are two different lists of sleep queues.  Both lists are connected
  110  * via the sq_hash entries.  The first list is the sleep queue chain list
  111  * that a sleep queue is on when it is attached to a wait channel.  The
  112  * second list is the free list hung off of a sleep queue that is attached
  113  * to a wait channel.
  114  *
  115  * Each sleep queue also contains the wait channel it is attached to, the
  116  * list of threads blocked on that wait channel, flags specific to the
  117  * wait channel, and the lock used to synchronize with a wait channel.
  118  * The flags are used to catch mismatches between the various consumers
  119  * of the sleep queue API (e.g. sleep/wakeup and condition variables).
  120  * The lock pointer is only used when invariants are enabled for various
  121  * debugging checks.
  122  *
  123  * Locking key:
  124  *  c - sleep queue chain lock
  125  */
  126 struct sleepqueue {
  127         TAILQ_HEAD(, thread) sq_blocked[NR_SLEEPQS];    /* (c) Blocked threads. */
  128         u_int sq_blockedcnt[NR_SLEEPQS];        /* (c) N. of blocked threads. */
  129         LIST_ENTRY(sleepqueue) sq_hash;         /* (c) Chain and free list. */
  130         LIST_HEAD(, sleepqueue) sq_free;        /* (c) Free queues. */
  131         void    *sq_wchan;                      /* (c) Wait channel. */
  132         int     sq_type;                        /* (c) Queue type. */
  133 #ifdef INVARIANTS
  134         struct lock_object *sq_lock;            /* (c) Associated lock. */
  135 #endif
  136 };
  137 
  138 struct sleepqueue_chain {
  139         LIST_HEAD(, sleepqueue) sc_queues;      /* List of sleep queues. */
  140         struct mtx sc_lock;                     /* Spin lock for this chain. */
  141 #ifdef SLEEPQUEUE_PROFILING
  142         u_int   sc_depth;                       /* Length of sc_queues. */
  143         u_int   sc_max_depth;                   /* Max length of sc_queues. */
  144 #endif
  145 } __aligned(CACHE_LINE_SIZE);
  146 
  147 #ifdef SLEEPQUEUE_PROFILING
  148 u_int sleepq_max_depth;
  149 static SYSCTL_NODE(_debug, OID_AUTO, sleepq, CTLFLAG_RD, 0, "sleepq profiling");
  150 static SYSCTL_NODE(_debug_sleepq, OID_AUTO, chains, CTLFLAG_RD, 0,
  151     "sleepq chain stats");
  152 SYSCTL_UINT(_debug_sleepq, OID_AUTO, max_depth, CTLFLAG_RD, &sleepq_max_depth,
  153     0, "maxmimum depth achieved of a single chain");
  154 
  155 static void     sleepq_profile(const char *wmesg);
  156 static int      prof_enabled;
  157 #endif
  158 static struct sleepqueue_chain sleepq_chains[SC_TABLESIZE];
  159 static uma_zone_t sleepq_zone;
  160 
  161 /*
  162  * Prototypes for non-exported routines.
  163  */
  164 static int      sleepq_catch_signals(void *wchan, int pri);
  165 static int      sleepq_check_signals(void);
  166 static int      sleepq_check_timeout(void);
  167 #ifdef INVARIANTS
  168 static void     sleepq_dtor(void *mem, int size, void *arg);
  169 #endif
  170 static int      sleepq_init(void *mem, int size, int flags);
  171 static int      sleepq_resume_thread(struct sleepqueue *sq, struct thread *td,
  172                     int pri);
  173 static void     sleepq_switch(void *wchan, int pri);
  174 static void     sleepq_timeout(void *arg);
  175 
  176 SDT_PROBE_DECLARE(sched, , , sleep);
  177 SDT_PROBE_DECLARE(sched, , , wakeup);
  178 
  179 /*
  180  * Initialize SLEEPQUEUE_PROFILING specific sysctl nodes.
  181  * Note that it must happen after sleepinit() has been fully executed, so
  182  * it must happen after SI_SUB_KMEM SYSINIT() subsystem setup.
  183  */
  184 #ifdef SLEEPQUEUE_PROFILING
  185 static void
  186 init_sleepqueue_profiling(void)
  187 {
  188         char chain_name[10];
  189         struct sysctl_oid *chain_oid;
  190         u_int i;
  191 
  192         for (i = 0; i < SC_TABLESIZE; i++) {
  193                 snprintf(chain_name, sizeof(chain_name), "%u", i);
  194                 chain_oid = SYSCTL_ADD_NODE(NULL,
  195                     SYSCTL_STATIC_CHILDREN(_debug_sleepq_chains), OID_AUTO,
  196                     chain_name, CTLFLAG_RD, NULL, "sleepq chain stats");
  197                 SYSCTL_ADD_UINT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO,
  198                     "depth", CTLFLAG_RD, &sleepq_chains[i].sc_depth, 0, NULL);
  199                 SYSCTL_ADD_UINT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO,
  200                     "max_depth", CTLFLAG_RD, &sleepq_chains[i].sc_max_depth, 0,
  201                     NULL);
  202         }
  203 }
  204 
  205 SYSINIT(sleepqueue_profiling, SI_SUB_LOCK, SI_ORDER_ANY,
  206     init_sleepqueue_profiling, NULL);
  207 #endif
  208 
  209 /*
  210  * Early initialization of sleep queues that is called from the sleepinit()
  211  * SYSINIT.
  212  */
  213 void
  214 init_sleepqueues(void)
  215 {
  216         int i;
  217 
  218         for (i = 0; i < SC_TABLESIZE; i++) {
  219                 LIST_INIT(&sleepq_chains[i].sc_queues);
  220                 mtx_init(&sleepq_chains[i].sc_lock, "sleepq chain", NULL,
  221                     MTX_SPIN | MTX_RECURSE);
  222         }
  223         sleepq_zone = uma_zcreate("SLEEPQUEUE", sizeof(struct sleepqueue),
  224 #ifdef INVARIANTS
  225             NULL, sleepq_dtor, sleepq_init, NULL, UMA_ALIGN_CACHE, 0);
  226 #else
  227             NULL, NULL, sleepq_init, NULL, UMA_ALIGN_CACHE, 0);
  228 #endif
  229 
  230         thread0.td_sleepqueue = sleepq_alloc();
  231 }
  232 
  233 /*
  234  * Get a sleep queue for a new thread.
  235  */
  236 struct sleepqueue *
  237 sleepq_alloc(void)
  238 {
  239 
  240         return (uma_zalloc(sleepq_zone, M_WAITOK));
  241 }
  242 
  243 /*
  244  * Free a sleep queue when a thread is destroyed.
  245  */
  246 void
  247 sleepq_free(struct sleepqueue *sq)
  248 {
  249 
  250         uma_zfree(sleepq_zone, sq);
  251 }
  252 
  253 /*
  254  * Lock the sleep queue chain associated with the specified wait channel.
  255  */
  256 void
  257 sleepq_lock(void *wchan)
  258 {
  259         struct sleepqueue_chain *sc;
  260 
  261         sc = SC_LOOKUP(wchan);
  262         mtx_lock_spin(&sc->sc_lock);
  263 }
  264 
  265 /*
  266  * Look up the sleep queue associated with a given wait channel in the hash
  267  * table locking the associated sleep queue chain.  If no queue is found in
  268  * the table, NULL is returned.
  269  */
  270 struct sleepqueue *
  271 sleepq_lookup(void *wchan)
  272 {
  273         struct sleepqueue_chain *sc;
  274         struct sleepqueue *sq;
  275 
  276         KASSERT(wchan != NULL, ("%s: invalid NULL wait channel", __func__));
  277         sc = SC_LOOKUP(wchan);
  278         mtx_assert(&sc->sc_lock, MA_OWNED);
  279         LIST_FOREACH(sq, &sc->sc_queues, sq_hash)
  280                 if (sq->sq_wchan == wchan)
  281                         return (sq);
  282         return (NULL);
  283 }
  284 
  285 /*
  286  * Unlock the sleep queue chain associated with a given wait channel.
  287  */
  288 void
  289 sleepq_release(void *wchan)
  290 {
  291         struct sleepqueue_chain *sc;
  292 
  293         sc = SC_LOOKUP(wchan);
  294         mtx_unlock_spin(&sc->sc_lock);
  295 }
  296 
  297 /*
  298  * Places the current thread on the sleep queue for the specified wait
  299  * channel.  If INVARIANTS is enabled, then it associates the passed in
  300  * lock with the sleepq to make sure it is held when that sleep queue is
  301  * woken up.
  302  */
  303 void
  304 sleepq_add(void *wchan, struct lock_object *lock, const char *wmesg, int flags,
  305     int queue)
  306 {
  307         struct sleepqueue_chain *sc;
  308         struct sleepqueue *sq;
  309         struct thread *td;
  310 
  311         td = curthread;
  312         sc = SC_LOOKUP(wchan);
  313         mtx_assert(&sc->sc_lock, MA_OWNED);
  314         MPASS(td->td_sleepqueue != NULL);
  315         MPASS(wchan != NULL);
  316         MPASS((queue >= 0) && (queue < NR_SLEEPQS));
  317 
  318         /* If this thread is not allowed to sleep, die a horrible death. */
  319         KASSERT(td->td_no_sleeping == 0,
  320             ("%s: td %p to sleep on wchan %p with sleeping prohibited",
  321             __func__, td, wchan));
  322 
  323         /* Look up the sleep queue associated with the wait channel 'wchan'. */
  324         sq = sleepq_lookup(wchan);
  325 
  326         /*
  327          * If the wait channel does not already have a sleep queue, use
  328          * this thread's sleep queue.  Otherwise, insert the current thread
  329          * into the sleep queue already in use by this wait channel.
  330          */
  331         if (sq == NULL) {
  332 #ifdef INVARIANTS
  333                 int i;
  334 
  335                 sq = td->td_sleepqueue;
  336                 for (i = 0; i < NR_SLEEPQS; i++) {
  337                         KASSERT(TAILQ_EMPTY(&sq->sq_blocked[i]),
  338                             ("thread's sleep queue %d is not empty", i));
  339                         KASSERT(sq->sq_blockedcnt[i] == 0,
  340                             ("thread's sleep queue %d count mismatches", i));
  341                 }
  342                 KASSERT(LIST_EMPTY(&sq->sq_free),
  343                     ("thread's sleep queue has a non-empty free list"));
  344                 KASSERT(sq->sq_wchan == NULL, ("stale sq_wchan pointer"));
  345                 sq->sq_lock = lock;
  346 #endif
  347 #ifdef SLEEPQUEUE_PROFILING
  348                 sc->sc_depth++;
  349                 if (sc->sc_depth > sc->sc_max_depth) {
  350                         sc->sc_max_depth = sc->sc_depth;
  351                         if (sc->sc_max_depth > sleepq_max_depth)
  352                                 sleepq_max_depth = sc->sc_max_depth;
  353                 }
  354 #endif
  355                 sq = td->td_sleepqueue;
  356                 LIST_INSERT_HEAD(&sc->sc_queues, sq, sq_hash);
  357                 sq->sq_wchan = wchan;
  358                 sq->sq_type = flags & SLEEPQ_TYPE;
  359         } else {
  360                 MPASS(wchan == sq->sq_wchan);
  361                 MPASS(lock == sq->sq_lock);
  362                 MPASS((flags & SLEEPQ_TYPE) == sq->sq_type);
  363                 LIST_INSERT_HEAD(&sq->sq_free, td->td_sleepqueue, sq_hash);
  364         }
  365         thread_lock(td);
  366         TAILQ_INSERT_TAIL(&sq->sq_blocked[queue], td, td_slpq);
  367         sq->sq_blockedcnt[queue]++;
  368         td->td_sleepqueue = NULL;
  369         td->td_sqqueue = queue;
  370         td->td_wchan = wchan;
  371         td->td_wmesg = wmesg;
  372         if (flags & SLEEPQ_INTERRUPTIBLE) {
  373                 td->td_flags |= TDF_SINTR;
  374                 td->td_flags &= ~TDF_SLEEPABORT;
  375         }
  376         thread_unlock(td);
  377 }
  378 
  379 /*
  380  * Sets a timeout that will remove the current thread from the specified
  381  * sleep queue after timo ticks if the thread has not already been awakened.
  382  */
  383 void
  384 sleepq_set_timeout_sbt(void *wchan, sbintime_t sbt, sbintime_t pr,
  385     int flags)
  386 {
  387         struct sleepqueue_chain *sc __unused;
  388         struct thread *td;
  389         sbintime_t pr1;
  390 
  391         td = curthread;
  392         sc = SC_LOOKUP(wchan);
  393         mtx_assert(&sc->sc_lock, MA_OWNED);
  394         MPASS(TD_ON_SLEEPQ(td));
  395         MPASS(td->td_sleepqueue == NULL);
  396         MPASS(wchan != NULL);
  397         if (cold && td == &thread0)
  398                 panic("timed sleep before timers are working");
  399         KASSERT(td->td_sleeptimo == 0, ("td %d %p td_sleeptimo %jx",
  400             td->td_tid, td, (uintmax_t)td->td_sleeptimo));
  401         thread_lock(td);
  402         callout_when(sbt, pr, flags, &td->td_sleeptimo, &pr1);
  403         thread_unlock(td);
  404         callout_reset_sbt_on(&td->td_slpcallout, td->td_sleeptimo, pr1,
  405             sleepq_timeout, td, PCPU_GET(cpuid), flags | C_PRECALC |
  406             C_DIRECT_EXEC);
  407 }
  408 
  409 /*
  410  * Return the number of actual sleepers for the specified queue.
  411  */
  412 u_int
  413 sleepq_sleepcnt(void *wchan, int queue)
  414 {
  415         struct sleepqueue *sq;
  416 
  417         KASSERT(wchan != NULL, ("%s: invalid NULL wait channel", __func__));
  418         MPASS((queue >= 0) && (queue < NR_SLEEPQS));
  419         sq = sleepq_lookup(wchan);
  420         if (sq == NULL)
  421                 return (0);
  422         return (sq->sq_blockedcnt[queue]);
  423 }
  424 
  425 /*
  426  * Marks the pending sleep of the current thread as interruptible and
  427  * makes an initial check for pending signals before putting a thread
  428  * to sleep. Enters and exits with the thread lock held.  Thread lock
  429  * may have transitioned from the sleepq lock to a run lock.
  430  */
  431 static int
  432 sleepq_catch_signals(void *wchan, int pri)
  433 {
  434         struct sleepqueue_chain *sc;
  435         struct sleepqueue *sq;
  436         struct thread *td;
  437         struct proc *p;
  438         struct sigacts *ps;
  439         int sig, ret;
  440 
  441         ret = 0;
  442         td = curthread;
  443         p = curproc;
  444         sc = SC_LOOKUP(wchan);
  445         mtx_assert(&sc->sc_lock, MA_OWNED);
  446         MPASS(wchan != NULL);
  447         if ((td->td_pflags & TDP_WAKEUP) != 0) {
  448                 td->td_pflags &= ~TDP_WAKEUP;
  449                 ret = EINTR;
  450                 thread_lock(td);
  451                 goto out;
  452         }
  453 
  454         /*
  455          * See if there are any pending signals or suspension requests for this
  456          * thread.  If not, we can switch immediately.
  457          */
  458         thread_lock(td);
  459         if ((td->td_flags & (TDF_NEEDSIGCHK | TDF_NEEDSUSPCHK)) != 0) {
  460                 thread_unlock(td);
  461                 mtx_unlock_spin(&sc->sc_lock);
  462                 CTR3(KTR_PROC, "sleepq catching signals: thread %p (pid %ld, %s)",
  463                         (void *)td, (long)p->p_pid, td->td_name);
  464                 PROC_LOCK(p);
  465                 /*
  466                  * Check for suspension first. Checking for signals and then
  467                  * suspending could result in a missed signal, since a signal
  468                  * can be delivered while this thread is suspended.
  469                  */
  470                 if ((td->td_flags & TDF_NEEDSUSPCHK) != 0) {
  471                         ret = thread_suspend_check(1);
  472                         MPASS(ret == 0 || ret == EINTR || ret == ERESTART);
  473                         if (ret != 0) {
  474                                 PROC_UNLOCK(p);
  475                                 mtx_lock_spin(&sc->sc_lock);
  476                                 thread_lock(td);
  477                                 goto out;
  478                         }
  479                 }
  480                 if ((td->td_flags & TDF_NEEDSIGCHK) != 0) {
  481                         ps = p->p_sigacts;
  482                         mtx_lock(&ps->ps_mtx);
  483                         sig = cursig(td);
  484                         if (sig == -1) {
  485                                 mtx_unlock(&ps->ps_mtx);
  486                                 KASSERT((td->td_flags & TDF_SBDRY) != 0,
  487                                     ("lost TDF_SBDRY"));
  488                                 KASSERT(TD_SBDRY_INTR(td),
  489                                     ("lost TDF_SERESTART of TDF_SEINTR"));
  490                                 KASSERT((td->td_flags &
  491                                     (TDF_SEINTR | TDF_SERESTART)) !=
  492                                     (TDF_SEINTR | TDF_SERESTART),
  493                                     ("both TDF_SEINTR and TDF_SERESTART"));
  494                                 ret = TD_SBDRY_ERRNO(td);
  495                         } else if (sig != 0) {
  496                                 ret = SIGISMEMBER(ps->ps_sigintr, sig) ?
  497                                     EINTR : ERESTART;
  498                                 mtx_unlock(&ps->ps_mtx);
  499                         } else {
  500                                 mtx_unlock(&ps->ps_mtx);
  501                         }
  502                 }
  503                 /*
  504                  * Lock the per-process spinlock prior to dropping the PROC_LOCK
  505                  * to avoid a signal delivery race.  PROC_LOCK, PROC_SLOCK, and
  506                  * thread_lock() are currently held in tdsendsignal().
  507                  */
  508                 PROC_SLOCK(p);
  509                 mtx_lock_spin(&sc->sc_lock);
  510                 PROC_UNLOCK(p);
  511                 thread_lock(td);
  512                 PROC_SUNLOCK(p);
  513         }
  514         if (ret == 0) {
  515                 sleepq_switch(wchan, pri);
  516                 return (0);
  517         }
  518 out:
  519         /*
  520          * There were pending signals and this thread is still
  521          * on the sleep queue, remove it from the sleep queue.
  522          */
  523         if (TD_ON_SLEEPQ(td)) {
  524                 sq = sleepq_lookup(wchan);
  525                 if (sleepq_resume_thread(sq, td, 0)) {
  526 #ifdef INVARIANTS
  527                         /*
  528                          * This thread hasn't gone to sleep yet, so it
  529                          * should not be swapped out.
  530                          */
  531                         panic("not waking up swapper");
  532 #endif
  533                 }
  534         }
  535         mtx_unlock_spin(&sc->sc_lock);
  536         MPASS(td->td_lock != &sc->sc_lock);
  537         return (ret);
  538 }
  539 
  540 /*
  541  * Switches to another thread if we are still asleep on a sleep queue.
  542  * Returns with thread lock.
  543  */
  544 static void
  545 sleepq_switch(void *wchan, int pri)
  546 {
  547         struct sleepqueue_chain *sc;
  548         struct sleepqueue *sq;
  549         struct thread *td;
  550         bool rtc_changed;
  551 
  552         td = curthread;
  553         sc = SC_LOOKUP(wchan);
  554         mtx_assert(&sc->sc_lock, MA_OWNED);
  555         THREAD_LOCK_ASSERT(td, MA_OWNED);
  556 
  557         /*
  558          * If we have a sleep queue, then we've already been woken up, so
  559          * just return.
  560          */
  561         if (td->td_sleepqueue != NULL) {
  562                 mtx_unlock_spin(&sc->sc_lock);
  563                 return;
  564         }
  565 
  566         /*
  567          * If TDF_TIMEOUT is set, then our sleep has been timed out
  568          * already but we are still on the sleep queue, so dequeue the
  569          * thread and return.
  570          *
  571          * Do the same if the real-time clock has been adjusted since this
  572          * thread calculated its timeout based on that clock.  This handles
  573          * the following race:
  574          * - The Ts thread needs to sleep until an absolute real-clock time.
  575          *   It copies the global rtc_generation into curthread->td_rtcgen,
  576          *   reads the RTC, and calculates a sleep duration based on that time.
  577          *   See umtxq_sleep() for an example.
  578          * - The Tc thread adjusts the RTC, bumps rtc_generation, and wakes
  579          *   threads that are sleeping until an absolute real-clock time.
  580          *   See tc_setclock() and the POSIX specification of clock_settime().
  581          * - Ts reaches the code below.  It holds the sleepqueue chain lock,
  582          *   so Tc has finished waking, so this thread must test td_rtcgen.
  583          * (The declaration of td_rtcgen refers to this comment.)
  584          */
  585         rtc_changed = td->td_rtcgen != 0 && td->td_rtcgen != rtc_generation;
  586         if ((td->td_flags & TDF_TIMEOUT) || rtc_changed) {
  587                 if (rtc_changed) {
  588                         td->td_rtcgen = 0;
  589                 }
  590                 MPASS(TD_ON_SLEEPQ(td));
  591                 sq = sleepq_lookup(wchan);
  592                 if (sleepq_resume_thread(sq, td, 0)) {
  593 #ifdef INVARIANTS
  594                         /*
  595                          * This thread hasn't gone to sleep yet, so it
  596                          * should not be swapped out.
  597                          */
  598                         panic("not waking up swapper");
  599 #endif
  600                 }
  601                 mtx_unlock_spin(&sc->sc_lock);
  602                 return;
  603         }
  604 #ifdef SLEEPQUEUE_PROFILING
  605         if (prof_enabled)
  606                 sleepq_profile(td->td_wmesg);
  607 #endif
  608         MPASS(td->td_sleepqueue == NULL);
  609         sched_sleep(td, pri);
  610         thread_lock_set(td, &sc->sc_lock);
  611         SDT_PROBE0(sched, , , sleep);
  612         TD_SET_SLEEPING(td);
  613         mi_switch(SW_VOL | SWT_SLEEPQ, NULL);
  614         KASSERT(TD_IS_RUNNING(td), ("running but not TDS_RUNNING"));
  615         CTR3(KTR_PROC, "sleepq resume: thread %p (pid %ld, %s)",
  616             (void *)td, (long)td->td_proc->p_pid, (void *)td->td_name);
  617 }
  618 
  619 /*
  620  * Check to see if we timed out.
  621  */
  622 static int
  623 sleepq_check_timeout(void)
  624 {
  625         struct thread *td;
  626         int res;
  627 
  628         td = curthread;
  629         THREAD_LOCK_ASSERT(td, MA_OWNED);
  630 
  631         /*
  632          * If TDF_TIMEOUT is set, we timed out.  But recheck
  633          * td_sleeptimo anyway.
  634          */
  635         res = 0;
  636         if (td->td_sleeptimo != 0) {
  637                 if (td->td_sleeptimo <= sbinuptime())
  638                         res = EWOULDBLOCK;
  639                 td->td_sleeptimo = 0;
  640         }
  641         if (td->td_flags & TDF_TIMEOUT)
  642                 td->td_flags &= ~TDF_TIMEOUT;
  643         else
  644                 /*
  645                  * We ignore the situation where timeout subsystem was
  646                  * unable to stop our callout.  The struct thread is
  647                  * type-stable, the callout will use the correct
  648                  * memory when running.  The checks of the
  649                  * td_sleeptimo value in this function and in
  650                  * sleepq_timeout() ensure that the thread does not
  651                  * get spurious wakeups, even if the callout was reset
  652                  * or thread reused.
  653                  */
  654                 callout_stop(&td->td_slpcallout);
  655         return (res);
  656 }
  657 
  658 /*
  659  * Check to see if we were awoken by a signal.
  660  */
  661 static int
  662 sleepq_check_signals(void)
  663 {
  664         struct thread *td;
  665 
  666         td = curthread;
  667         THREAD_LOCK_ASSERT(td, MA_OWNED);
  668 
  669         /* We are no longer in an interruptible sleep. */
  670         if (td->td_flags & TDF_SINTR)
  671                 td->td_flags &= ~TDF_SINTR;
  672 
  673         if (td->td_flags & TDF_SLEEPABORT) {
  674                 td->td_flags &= ~TDF_SLEEPABORT;
  675                 return (td->td_intrval);
  676         }
  677 
  678         return (0);
  679 }
  680 
  681 /*
  682  * Block the current thread until it is awakened from its sleep queue.
  683  */
  684 void
  685 sleepq_wait(void *wchan, int pri)
  686 {
  687         struct thread *td;
  688 
  689         td = curthread;
  690         MPASS(!(td->td_flags & TDF_SINTR));
  691         thread_lock(td);
  692         sleepq_switch(wchan, pri);
  693         thread_unlock(td);
  694 }
  695 
  696 /*
  697  * Block the current thread until it is awakened from its sleep queue
  698  * or it is interrupted by a signal.
  699  */
  700 int
  701 sleepq_wait_sig(void *wchan, int pri)
  702 {
  703         int rcatch;
  704         int rval;
  705 
  706         rcatch = sleepq_catch_signals(wchan, pri);
  707         rval = sleepq_check_signals();
  708         thread_unlock(curthread);
  709         if (rcatch)
  710                 return (rcatch);
  711         return (rval);
  712 }
  713 
  714 /*
  715  * Block the current thread until it is awakened from its sleep queue
  716  * or it times out while waiting.
  717  */
  718 int
  719 sleepq_timedwait(void *wchan, int pri)
  720 {
  721         struct thread *td;
  722         int rval;
  723 
  724         td = curthread;
  725         MPASS(!(td->td_flags & TDF_SINTR));
  726         thread_lock(td);
  727         sleepq_switch(wchan, pri);
  728         rval = sleepq_check_timeout();
  729         thread_unlock(td);
  730 
  731         return (rval);
  732 }
  733 
  734 /*
  735  * Block the current thread until it is awakened from its sleep queue,
  736  * it is interrupted by a signal, or it times out waiting to be awakened.
  737  */
  738 int
  739 sleepq_timedwait_sig(void *wchan, int pri)
  740 {
  741         int rcatch, rvalt, rvals;
  742 
  743         rcatch = sleepq_catch_signals(wchan, pri);
  744         rvalt = sleepq_check_timeout();
  745         rvals = sleepq_check_signals();
  746         thread_unlock(curthread);
  747         if (rcatch)
  748                 return (rcatch);
  749         if (rvals)
  750                 return (rvals);
  751         return (rvalt);
  752 }
  753 
  754 /*
  755  * Returns the type of sleepqueue given a waitchannel.
  756  */
  757 int
  758 sleepq_type(void *wchan)
  759 {
  760         struct sleepqueue *sq;
  761         int type;
  762 
  763         MPASS(wchan != NULL);
  764 
  765         sleepq_lock(wchan);
  766         sq = sleepq_lookup(wchan);
  767         if (sq == NULL) {
  768                 sleepq_release(wchan);
  769                 return (-1);
  770         }
  771         type = sq->sq_type;
  772         sleepq_release(wchan);
  773         return (type);
  774 }
  775 
  776 /*
  777  * Removes a thread from a sleep queue and makes it
  778  * runnable.
  779  */
  780 static int
  781 sleepq_resume_thread(struct sleepqueue *sq, struct thread *td, int pri)
  782 {
  783         struct sleepqueue_chain *sc __unused;
  784 
  785         MPASS(td != NULL);
  786         MPASS(sq->sq_wchan != NULL);
  787         MPASS(td->td_wchan == sq->sq_wchan);
  788         MPASS(td->td_sqqueue < NR_SLEEPQS && td->td_sqqueue >= 0);
  789         THREAD_LOCK_ASSERT(td, MA_OWNED);
  790         sc = SC_LOOKUP(sq->sq_wchan);
  791         mtx_assert(&sc->sc_lock, MA_OWNED);
  792 
  793         SDT_PROBE2(sched, , , wakeup, td, td->td_proc);
  794 
  795         /* Remove the thread from the queue. */
  796         sq->sq_blockedcnt[td->td_sqqueue]--;
  797         TAILQ_REMOVE(&sq->sq_blocked[td->td_sqqueue], td, td_slpq);
  798 
  799         /*
  800          * Get a sleep queue for this thread.  If this is the last waiter,
  801          * use the queue itself and take it out of the chain, otherwise,
  802          * remove a queue from the free list.
  803          */
  804         if (LIST_EMPTY(&sq->sq_free)) {
  805                 td->td_sleepqueue = sq;
  806 #ifdef INVARIANTS
  807                 sq->sq_wchan = NULL;
  808 #endif
  809 #ifdef SLEEPQUEUE_PROFILING
  810                 sc->sc_depth--;
  811 #endif
  812         } else
  813                 td->td_sleepqueue = LIST_FIRST(&sq->sq_free);
  814         LIST_REMOVE(td->td_sleepqueue, sq_hash);
  815 
  816         td->td_wmesg = NULL;
  817         td->td_wchan = NULL;
  818         td->td_flags &= ~TDF_SINTR;
  819 
  820         CTR3(KTR_PROC, "sleepq_wakeup: thread %p (pid %ld, %s)",
  821             (void *)td, (long)td->td_proc->p_pid, td->td_name);
  822 
  823         /* Adjust priority if requested. */
  824         MPASS(pri == 0 || (pri >= PRI_MIN && pri <= PRI_MAX));
  825         if (pri != 0 && td->td_priority > pri &&
  826             PRI_BASE(td->td_pri_class) == PRI_TIMESHARE)
  827                 sched_prio(td, pri);
  828 
  829         /*
  830          * Note that thread td might not be sleeping if it is running
  831          * sleepq_catch_signals() on another CPU or is blocked on its
  832          * proc lock to check signals.  There's no need to mark the
  833          * thread runnable in that case.
  834          */
  835         if (TD_IS_SLEEPING(td)) {
  836                 TD_CLR_SLEEPING(td);
  837                 return (setrunnable(td));
  838         }
  839         return (0);
  840 }
  841 
  842 #ifdef INVARIANTS
  843 /*
  844  * UMA zone item deallocator.
  845  */
  846 static void
  847 sleepq_dtor(void *mem, int size, void *arg)
  848 {
  849         struct sleepqueue *sq;
  850         int i;
  851 
  852         sq = mem;
  853         for (i = 0; i < NR_SLEEPQS; i++) {
  854                 MPASS(TAILQ_EMPTY(&sq->sq_blocked[i]));
  855                 MPASS(sq->sq_blockedcnt[i] == 0);
  856         }
  857 }
  858 #endif
  859 
  860 /*
  861  * UMA zone item initializer.
  862  */
  863 static int
  864 sleepq_init(void *mem, int size, int flags)
  865 {
  866         struct sleepqueue *sq;
  867         int i;
  868 
  869         bzero(mem, size);
  870         sq = mem;
  871         for (i = 0; i < NR_SLEEPQS; i++) {
  872                 TAILQ_INIT(&sq->sq_blocked[i]);
  873                 sq->sq_blockedcnt[i] = 0;
  874         }
  875         LIST_INIT(&sq->sq_free);
  876         return (0);
  877 }
  878 
  879 /*
  880  * Find the highest priority thread sleeping on a wait channel and resume it.
  881  */
  882 int
  883 sleepq_signal(void *wchan, int flags, int pri, int queue)
  884 {
  885         struct sleepqueue *sq;
  886         struct thread *td, *besttd;
  887         int wakeup_swapper;
  888 
  889         CTR2(KTR_PROC, "sleepq_signal(%p, %d)", wchan, flags);
  890         KASSERT(wchan != NULL, ("%s: invalid NULL wait channel", __func__));
  891         MPASS((queue >= 0) && (queue < NR_SLEEPQS));
  892         sq = sleepq_lookup(wchan);
  893         if (sq == NULL)
  894                 return (0);
  895         KASSERT(sq->sq_type == (flags & SLEEPQ_TYPE),
  896             ("%s: mismatch between sleep/wakeup and cv_*", __func__));
  897 
  898         /*
  899          * Find the highest priority thread on the queue.  If there is a
  900          * tie, use the thread that first appears in the queue as it has
  901          * been sleeping the longest since threads are always added to
  902          * the tail of sleep queues.
  903          */
  904         besttd = TAILQ_FIRST(&sq->sq_blocked[queue]);
  905         TAILQ_FOREACH(td, &sq->sq_blocked[queue], td_slpq) {
  906                 if (td->td_priority < besttd->td_priority)
  907                         besttd = td;
  908         }
  909         MPASS(besttd != NULL);
  910         thread_lock(besttd);
  911         wakeup_swapper = sleepq_resume_thread(sq, besttd, pri);
  912         thread_unlock(besttd);
  913         return (wakeup_swapper);
  914 }
  915 
  916 static bool
  917 match_any(struct thread *td __unused)
  918 {
  919 
  920         return (true);
  921 }
  922 
  923 /*
  924  * Resume all threads sleeping on a specified wait channel.
  925  */
  926 int
  927 sleepq_broadcast(void *wchan, int flags, int pri, int queue)
  928 {
  929         struct sleepqueue *sq;
  930 
  931         CTR2(KTR_PROC, "sleepq_broadcast(%p, %d)", wchan, flags);
  932         KASSERT(wchan != NULL, ("%s: invalid NULL wait channel", __func__));
  933         MPASS((queue >= 0) && (queue < NR_SLEEPQS));
  934         sq = sleepq_lookup(wchan);
  935         if (sq == NULL)
  936                 return (0);
  937         KASSERT(sq->sq_type == (flags & SLEEPQ_TYPE),
  938             ("%s: mismatch between sleep/wakeup and cv_*", __func__));
  939 
  940         return (sleepq_remove_matching(sq, queue, match_any, pri));
  941 }
  942 
  943 /*
  944  * Resume threads on the sleep queue that match the given predicate.
  945  */
  946 int
  947 sleepq_remove_matching(struct sleepqueue *sq, int queue,
  948     bool (*matches)(struct thread *), int pri)
  949 {
  950         struct thread *td, *tdn;
  951         int wakeup_swapper;
  952 
  953         /*
  954          * The last thread will be given ownership of sq and may
  955          * re-enqueue itself before sleepq_resume_thread() returns,
  956          * so we must cache the "next" queue item at the beginning
  957          * of the final iteration.
  958          */
  959         wakeup_swapper = 0;
  960         TAILQ_FOREACH_SAFE(td, &sq->sq_blocked[queue], td_slpq, tdn) {
  961                 thread_lock(td);
  962                 if (matches(td))
  963                         wakeup_swapper |= sleepq_resume_thread(sq, td, pri);
  964                 thread_unlock(td);
  965         }
  966 
  967         return (wakeup_swapper);
  968 }
  969 
  970 /*
  971  * Time sleeping threads out.  When the timeout expires, the thread is
  972  * removed from the sleep queue and made runnable if it is still asleep.
  973  */
  974 static void
  975 sleepq_timeout(void *arg)
  976 {
  977         struct sleepqueue_chain *sc __unused;
  978         struct sleepqueue *sq;
  979         struct thread *td;
  980         void *wchan;
  981         int wakeup_swapper;
  982 
  983         td = arg;
  984         wakeup_swapper = 0;
  985         CTR3(KTR_PROC, "sleepq_timeout: thread %p (pid %ld, %s)",
  986             (void *)td, (long)td->td_proc->p_pid, (void *)td->td_name);
  987 
  988         thread_lock(td);
  989 
  990         if (td->td_sleeptimo > sbinuptime() || td->td_sleeptimo == 0) {
  991                 /*
  992                  * The thread does not want a timeout (yet).
  993                  */
  994         } else if (TD_IS_SLEEPING(td) && TD_ON_SLEEPQ(td)) {
  995                 /*
  996                  * See if the thread is asleep and get the wait
  997                  * channel if it is.
  998                  */
  999                 wchan = td->td_wchan;
 1000                 sc = SC_LOOKUP(wchan);
 1001                 THREAD_LOCKPTR_ASSERT(td, &sc->sc_lock);
 1002                 sq = sleepq_lookup(wchan);
 1003                 MPASS(sq != NULL);
 1004                 td->td_flags |= TDF_TIMEOUT;
 1005                 wakeup_swapper = sleepq_resume_thread(sq, td, 0);
 1006         } else if (TD_ON_SLEEPQ(td)) {
 1007                 /*
 1008                  * If the thread is on the SLEEPQ but isn't sleeping
 1009                  * yet, it can either be on another CPU in between
 1010                  * sleepq_add() and one of the sleepq_*wait*()
 1011                  * routines or it can be in sleepq_catch_signals().
 1012                  */
 1013                 td->td_flags |= TDF_TIMEOUT;
 1014         }
 1015 
 1016         thread_unlock(td);
 1017         if (wakeup_swapper)
 1018                 kick_proc0();
 1019 }
 1020 
 1021 /*
 1022  * Resumes a specific thread from the sleep queue associated with a specific
 1023  * wait channel if it is on that queue.
 1024  */
 1025 void
 1026 sleepq_remove(struct thread *td, void *wchan)
 1027 {
 1028         struct sleepqueue *sq;
 1029         int wakeup_swapper;
 1030 
 1031         /*
 1032          * Look up the sleep queue for this wait channel, then re-check
 1033          * that the thread is asleep on that channel, if it is not, then
 1034          * bail.
 1035          */
 1036         MPASS(wchan != NULL);
 1037         sleepq_lock(wchan);
 1038         sq = sleepq_lookup(wchan);
 1039         /*
 1040          * We can not lock the thread here as it may be sleeping on a
 1041          * different sleepq.  However, holding the sleepq lock for this
 1042          * wchan can guarantee that we do not miss a wakeup for this
 1043          * channel.  The asserts below will catch any false positives.
 1044          */
 1045         if (!TD_ON_SLEEPQ(td) || td->td_wchan != wchan) {
 1046                 sleepq_release(wchan);
 1047                 return;
 1048         }
 1049         /* Thread is asleep on sleep queue sq, so wake it up. */
 1050         thread_lock(td);
 1051         MPASS(sq != NULL);
 1052         MPASS(td->td_wchan == wchan);
 1053         wakeup_swapper = sleepq_resume_thread(sq, td, 0);
 1054         thread_unlock(td);
 1055         sleepq_release(wchan);
 1056         if (wakeup_swapper)
 1057                 kick_proc0();
 1058 }
 1059 
 1060 /*
 1061  * Abort a thread as if an interrupt had occurred.  Only abort
 1062  * interruptible waits (unfortunately it isn't safe to abort others).
 1063  */
 1064 int
 1065 sleepq_abort(struct thread *td, int intrval)
 1066 {
 1067         struct sleepqueue *sq;
 1068         void *wchan;
 1069 
 1070         THREAD_LOCK_ASSERT(td, MA_OWNED);
 1071         MPASS(TD_ON_SLEEPQ(td));
 1072         MPASS(td->td_flags & TDF_SINTR);
 1073         MPASS(intrval == EINTR || intrval == ERESTART);
 1074 
 1075         /*
 1076          * If the TDF_TIMEOUT flag is set, just leave. A
 1077          * timeout is scheduled anyhow.
 1078          */
 1079         if (td->td_flags & TDF_TIMEOUT)
 1080                 return (0);
 1081 
 1082         CTR3(KTR_PROC, "sleepq_abort: thread %p (pid %ld, %s)",
 1083             (void *)td, (long)td->td_proc->p_pid, (void *)td->td_name);
 1084         td->td_intrval = intrval;
 1085         td->td_flags |= TDF_SLEEPABORT;
 1086         /*
 1087          * If the thread has not slept yet it will find the signal in
 1088          * sleepq_catch_signals() and call sleepq_resume_thread.  Otherwise
 1089          * we have to do it here.
 1090          */
 1091         if (!TD_IS_SLEEPING(td))
 1092                 return (0);
 1093         wchan = td->td_wchan;
 1094         MPASS(wchan != NULL);
 1095         sq = sleepq_lookup(wchan);
 1096         MPASS(sq != NULL);
 1097 
 1098         /* Thread is asleep on sleep queue sq, so wake it up. */
 1099         return (sleepq_resume_thread(sq, td, 0));
 1100 }
 1101 
 1102 void
 1103 sleepq_chains_remove_matching(bool (*matches)(struct thread *))
 1104 {
 1105         struct sleepqueue_chain *sc;
 1106         struct sleepqueue *sq, *sq1;
 1107         int i, wakeup_swapper;
 1108 
 1109         wakeup_swapper = 0;
 1110         for (sc = &sleepq_chains[0]; sc < sleepq_chains + SC_TABLESIZE; ++sc) {
 1111                 if (LIST_EMPTY(&sc->sc_queues)) {
 1112                         continue;
 1113                 }
 1114                 mtx_lock_spin(&sc->sc_lock);
 1115                 LIST_FOREACH_SAFE(sq, &sc->sc_queues, sq_hash, sq1) {
 1116                         for (i = 0; i < NR_SLEEPQS; ++i) {
 1117                                 wakeup_swapper |= sleepq_remove_matching(sq, i,
 1118                                     matches, 0);
 1119                         }
 1120                 }
 1121                 mtx_unlock_spin(&sc->sc_lock);
 1122         }
 1123         if (wakeup_swapper) {
 1124                 kick_proc0();
 1125         }
 1126 }
 1127 
 1128 /*
 1129  * Prints the stacks of all threads presently sleeping on wchan/queue to
 1130  * the sbuf sb.  Sets count_stacks_printed to the number of stacks actually
 1131  * printed.  Typically, this will equal the number of threads sleeping on the
 1132  * queue, but may be less if sb overflowed before all stacks were printed.
 1133  */
 1134 #ifdef STACK
 1135 int
 1136 sleepq_sbuf_print_stacks(struct sbuf *sb, void *wchan, int queue,
 1137     int *count_stacks_printed)
 1138 {
 1139         struct thread *td, *td_next;
 1140         struct sleepqueue *sq;
 1141         struct stack **st;
 1142         struct sbuf **td_infos;
 1143         int i, stack_idx, error, stacks_to_allocate;
 1144         bool finished;
 1145 
 1146         error = 0;
 1147         finished = false;
 1148 
 1149         KASSERT(wchan != NULL, ("%s: invalid NULL wait channel", __func__));
 1150         MPASS((queue >= 0) && (queue < NR_SLEEPQS));
 1151 
 1152         stacks_to_allocate = 10;
 1153         for (i = 0; i < 3 && !finished ; i++) {
 1154                 /* We cannot malloc while holding the queue's spinlock, so
 1155                  * we do our mallocs now, and hope it is enough.  If it
 1156                  * isn't, we will free these, drop the lock, malloc more,
 1157                  * and try again, up to a point.  After that point we will
 1158                  * give up and report ENOMEM. We also cannot write to sb
 1159                  * during this time since the client may have set the
 1160                  * SBUF_AUTOEXTEND flag on their sbuf, which could cause a
 1161                  * malloc as we print to it.  So we defer actually printing
 1162                  * to sb until after we drop the spinlock.
 1163                  */
 1164 
 1165                 /* Where we will store the stacks. */
 1166                 st = malloc(sizeof(struct stack *) * stacks_to_allocate,
 1167                     M_TEMP, M_WAITOK);
 1168                 for (stack_idx = 0; stack_idx < stacks_to_allocate;
 1169                     stack_idx++)
 1170                         st[stack_idx] = stack_create(M_WAITOK);
 1171 
 1172                 /* Where we will store the td name, tid, etc. */
 1173                 td_infos = malloc(sizeof(struct sbuf *) * stacks_to_allocate,
 1174                     M_TEMP, M_WAITOK);
 1175                 for (stack_idx = 0; stack_idx < stacks_to_allocate;
 1176                     stack_idx++)
 1177                         td_infos[stack_idx] = sbuf_new(NULL, NULL,
 1178                             MAXCOMLEN + sizeof(struct thread *) * 2 + 40,
 1179                             SBUF_FIXEDLEN);
 1180 
 1181                 sleepq_lock(wchan);
 1182                 sq = sleepq_lookup(wchan);
 1183                 if (sq == NULL) {
 1184                         /* This sleepq does not exist; exit and return ENOENT. */
 1185                         error = ENOENT;
 1186                         finished = true;
 1187                         sleepq_release(wchan);
 1188                         goto loop_end;
 1189                 }
 1190 
 1191                 stack_idx = 0;
 1192                 /* Save thread info */
 1193                 TAILQ_FOREACH_SAFE(td, &sq->sq_blocked[queue], td_slpq,
 1194                     td_next) {
 1195                         if (stack_idx >= stacks_to_allocate)
 1196                                 goto loop_end;
 1197 
 1198                         /* Note the td_lock is equal to the sleepq_lock here. */
 1199                         stack_save_td(st[stack_idx], td);
 1200 
 1201                         sbuf_printf(td_infos[stack_idx], "%d: %s %p",
 1202                             td->td_tid, td->td_name, td);
 1203 
 1204                         ++stack_idx;
 1205                 }
 1206 
 1207                 finished = true;
 1208                 sleepq_release(wchan);
 1209 
 1210                 /* Print the stacks */
 1211                 for (i = 0; i < stack_idx; i++) {
 1212                         sbuf_finish(td_infos[i]);
 1213                         sbuf_printf(sb, "--- thread %s: ---\n", sbuf_data(td_infos[i]));
 1214                         stack_sbuf_print(sb, st[i]);
 1215                         sbuf_printf(sb, "\n");
 1216 
 1217                         error = sbuf_error(sb);
 1218                         if (error == 0)
 1219                                 *count_stacks_printed = stack_idx;
 1220                 }
 1221 
 1222 loop_end:
 1223                 if (!finished)
 1224                         sleepq_release(wchan);
 1225                 for (stack_idx = 0; stack_idx < stacks_to_allocate;
 1226                     stack_idx++)
 1227                         stack_destroy(st[stack_idx]);
 1228                 for (stack_idx = 0; stack_idx < stacks_to_allocate;
 1229                     stack_idx++)
 1230                         sbuf_delete(td_infos[stack_idx]);
 1231                 free(st, M_TEMP);
 1232                 free(td_infos, M_TEMP);
 1233                 stacks_to_allocate *= 10;
 1234         }
 1235 
 1236         if (!finished && error == 0)
 1237                 error = ENOMEM;
 1238 
 1239         return (error);
 1240 }
 1241 #endif
 1242 
 1243 #ifdef SLEEPQUEUE_PROFILING
 1244 #define SLEEPQ_PROF_LOCATIONS   1024
 1245 #define SLEEPQ_SBUFSIZE         512
 1246 struct sleepq_prof {
 1247         LIST_ENTRY(sleepq_prof) sp_link;
 1248         const char      *sp_wmesg;
 1249         long            sp_count;
 1250 };
 1251 
 1252 LIST_HEAD(sqphead, sleepq_prof);
 1253 
 1254 struct sqphead sleepq_prof_free;
 1255 struct sqphead sleepq_hash[SC_TABLESIZE];
 1256 static struct sleepq_prof sleepq_profent[SLEEPQ_PROF_LOCATIONS];
 1257 static struct mtx sleepq_prof_lock;
 1258 MTX_SYSINIT(sleepq_prof_lock, &sleepq_prof_lock, "sleepq_prof", MTX_SPIN);
 1259 
 1260 static void
 1261 sleepq_profile(const char *wmesg)
 1262 {
 1263         struct sleepq_prof *sp;
 1264 
 1265         mtx_lock_spin(&sleepq_prof_lock);
 1266         if (prof_enabled == 0)
 1267                 goto unlock;
 1268         LIST_FOREACH(sp, &sleepq_hash[SC_HASH(wmesg)], sp_link)
 1269                 if (sp->sp_wmesg == wmesg)
 1270                         goto done;
 1271         sp = LIST_FIRST(&sleepq_prof_free);
 1272         if (sp == NULL)
 1273                 goto unlock;
 1274         sp->sp_wmesg = wmesg;
 1275         LIST_REMOVE(sp, sp_link);
 1276         LIST_INSERT_HEAD(&sleepq_hash[SC_HASH(wmesg)], sp, sp_link);
 1277 done:
 1278         sp->sp_count++;
 1279 unlock:
 1280         mtx_unlock_spin(&sleepq_prof_lock);
 1281         return;
 1282 }
 1283 
 1284 static void
 1285 sleepq_prof_reset(void)
 1286 {
 1287         struct sleepq_prof *sp;
 1288         int enabled;
 1289         int i;
 1290 
 1291         mtx_lock_spin(&sleepq_prof_lock);
 1292         enabled = prof_enabled;
 1293         prof_enabled = 0;
 1294         for (i = 0; i < SC_TABLESIZE; i++)
 1295                 LIST_INIT(&sleepq_hash[i]);
 1296         LIST_INIT(&sleepq_prof_free);
 1297         for (i = 0; i < SLEEPQ_PROF_LOCATIONS; i++) {
 1298                 sp = &sleepq_profent[i];
 1299                 sp->sp_wmesg = NULL;
 1300                 sp->sp_count = 0;
 1301                 LIST_INSERT_HEAD(&sleepq_prof_free, sp, sp_link);
 1302         }
 1303         prof_enabled = enabled;
 1304         mtx_unlock_spin(&sleepq_prof_lock);
 1305 }
 1306 
 1307 static int
 1308 enable_sleepq_prof(SYSCTL_HANDLER_ARGS)
 1309 {
 1310         int error, v;
 1311 
 1312         v = prof_enabled;
 1313         error = sysctl_handle_int(oidp, &v, v, req);
 1314         if (error)
 1315                 return (error);
 1316         if (req->newptr == NULL)
 1317                 return (error);
 1318         if (v == prof_enabled)
 1319                 return (0);
 1320         if (v == 1)
 1321                 sleepq_prof_reset();
 1322         mtx_lock_spin(&sleepq_prof_lock);
 1323         prof_enabled = !!v;
 1324         mtx_unlock_spin(&sleepq_prof_lock);
 1325 
 1326         return (0);
 1327 }
 1328 
 1329 static int
 1330 reset_sleepq_prof_stats(SYSCTL_HANDLER_ARGS)
 1331 {
 1332         int error, v;
 1333 
 1334         v = 0;
 1335         error = sysctl_handle_int(oidp, &v, 0, req);
 1336         if (error)
 1337                 return (error);
 1338         if (req->newptr == NULL)
 1339                 return (error);
 1340         if (v == 0)
 1341                 return (0);
 1342         sleepq_prof_reset();
 1343 
 1344         return (0);
 1345 }
 1346 
 1347 static int
 1348 dump_sleepq_prof_stats(SYSCTL_HANDLER_ARGS)
 1349 {
 1350         struct sleepq_prof *sp;
 1351         struct sbuf *sb;
 1352         int enabled;
 1353         int error;
 1354         int i;
 1355 
 1356         error = sysctl_wire_old_buffer(req, 0);
 1357         if (error != 0)
 1358                 return (error);
 1359         sb = sbuf_new_for_sysctl(NULL, NULL, SLEEPQ_SBUFSIZE, req);
 1360         sbuf_printf(sb, "\nwmesg\tcount\n");
 1361         enabled = prof_enabled;
 1362         mtx_lock_spin(&sleepq_prof_lock);
 1363         prof_enabled = 0;
 1364         mtx_unlock_spin(&sleepq_prof_lock);
 1365         for (i = 0; i < SC_TABLESIZE; i++) {
 1366                 LIST_FOREACH(sp, &sleepq_hash[i], sp_link) {
 1367                         sbuf_printf(sb, "%s\t%ld\n",
 1368                             sp->sp_wmesg, sp->sp_count);
 1369                 }
 1370         }
 1371         mtx_lock_spin(&sleepq_prof_lock);
 1372         prof_enabled = enabled;
 1373         mtx_unlock_spin(&sleepq_prof_lock);
 1374 
 1375         error = sbuf_finish(sb);
 1376         sbuf_delete(sb);
 1377         return (error);
 1378 }
 1379 
 1380 SYSCTL_PROC(_debug_sleepq, OID_AUTO, stats, CTLTYPE_STRING | CTLFLAG_RD,
 1381     NULL, 0, dump_sleepq_prof_stats, "A", "Sleepqueue profiling statistics");
 1382 SYSCTL_PROC(_debug_sleepq, OID_AUTO, reset, CTLTYPE_INT | CTLFLAG_RW,
 1383     NULL, 0, reset_sleepq_prof_stats, "I",
 1384     "Reset sleepqueue profiling statistics");
 1385 SYSCTL_PROC(_debug_sleepq, OID_AUTO, enable, CTLTYPE_INT | CTLFLAG_RW,
 1386     NULL, 0, enable_sleepq_prof, "I", "Enable sleepqueue profiling");
 1387 #endif
 1388 
 1389 #ifdef DDB
 1390 DB_SHOW_COMMAND(sleepq, db_show_sleepqueue)
 1391 {
 1392         struct sleepqueue_chain *sc;
 1393         struct sleepqueue *sq;
 1394 #ifdef INVARIANTS
 1395         struct lock_object *lock;
 1396 #endif
 1397         struct thread *td;
 1398         void *wchan;
 1399         int i;
 1400 
 1401         if (!have_addr)
 1402                 return;
 1403 
 1404         /*
 1405          * First, see if there is an active sleep queue for the wait channel
 1406          * indicated by the address.
 1407          */
 1408         wchan = (void *)addr;
 1409         sc = SC_LOOKUP(wchan);
 1410         LIST_FOREACH(sq, &sc->sc_queues, sq_hash)
 1411                 if (sq->sq_wchan == wchan)
 1412                         goto found;
 1413 
 1414         /*
 1415          * Second, see if there is an active sleep queue at the address
 1416          * indicated.
 1417          */
 1418         for (i = 0; i < SC_TABLESIZE; i++)
 1419                 LIST_FOREACH(sq, &sleepq_chains[i].sc_queues, sq_hash) {
 1420                         if (sq == (struct sleepqueue *)addr)
 1421                                 goto found;
 1422                 }
 1423 
 1424         db_printf("Unable to locate a sleep queue via %p\n", (void *)addr);
 1425         return;
 1426 found:
 1427         db_printf("Wait channel: %p\n", sq->sq_wchan);
 1428         db_printf("Queue type: %d\n", sq->sq_type);
 1429 #ifdef INVARIANTS
 1430         if (sq->sq_lock) {
 1431                 lock = sq->sq_lock;
 1432                 db_printf("Associated Interlock: %p - (%s) %s\n", lock,
 1433                     LOCK_CLASS(lock)->lc_name, lock->lo_name);
 1434         }
 1435 #endif
 1436         db_printf("Blocked threads:\n");
 1437         for (i = 0; i < NR_SLEEPQS; i++) {
 1438                 db_printf("\nQueue[%d]:\n", i);
 1439                 if (TAILQ_EMPTY(&sq->sq_blocked[i]))
 1440                         db_printf("\tempty\n");
 1441                 else
 1442                         TAILQ_FOREACH(td, &sq->sq_blocked[i],
 1443                                       td_slpq) {
 1444                                 db_printf("\t%p (tid %d, pid %d, \"%s\")\n", td,
 1445                                           td->td_tid, td->td_proc->p_pid,
 1446                                           td->td_name);
 1447                         }
 1448                 db_printf("(expected: %u)\n", sq->sq_blockedcnt[i]);
 1449         }
 1450 }
 1451 
 1452 /* Alias 'show sleepqueue' to 'show sleepq'. */
 1453 DB_SHOW_ALIAS(sleepqueue, db_show_sleepqueue);
 1454 #endif

Cache object: 349f59111c14fe006cded7d2ca78a6c4


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.