The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/kern_turnstile.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*      $NetBSD: kern_turnstile.c,v 1.45 2022/10/26 23:27:16 riastradh Exp $    */
    2 
    3 /*-
    4  * Copyright (c) 2002, 2006, 2007, 2009, 2019, 2020
    5  *     The NetBSD Foundation, Inc.
    6  * All rights reserved.
    7  *
    8  * This code is derived from software contributed to The NetBSD Foundation
    9  * by Jason R. Thorpe and Andrew Doran.
   10  *
   11  * Redistribution and use in source and binary forms, with or without
   12  * modification, are permitted provided that the following conditions
   13  * are met:
   14  * 1. Redistributions of source code must retain the above copyright
   15  *    notice, this list of conditions and the following disclaimer.
   16  * 2. Redistributions in binary form must reproduce the above copyright
   17  *    notice, this list of conditions and the following disclaimer in the
   18  *    documentation and/or other materials provided with the distribution.
   19  *
   20  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
   21  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
   22  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
   23  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
   24  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
   25  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
   26  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
   27  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
   28  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
   29  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   30  * POSSIBILITY OF SUCH DAMAGE.
   31  */
   32 
   33 /*
   34  * Turnstiles are described in detail in:
   35  *
   36  *      Solaris Internals: Core Kernel Architecture, Jim Mauro and
   37  *          Richard McDougall.
   38  *
   39  * Turnstiles are kept in a hash table.  There are likely to be many more
   40  * synchronisation objects than there are threads.  Since a thread can block
   41  * on only one lock at a time, we only need one turnstile per thread, and
   42  * so they are allocated at thread creation time.
   43  *
   44  * When a thread decides it needs to block on a lock, it looks up the
   45  * active turnstile for that lock.  If no active turnstile exists, then
   46  * the process lends its turnstile to the lock.  If there is already an
   47  * active turnstile for the lock, the thread places its turnstile on a
   48  * list of free turnstiles, and references the active one instead.
   49  *
   50  * The act of looking up the turnstile acquires an interlock on the sleep
   51  * queue.  If a thread decides it doesn't need to block after all, then this
   52  * interlock must be released by explicitly aborting the turnstile
   53  * operation.
   54  *
   55  * When a thread is awakened, it needs to get its turnstile back.  If there
   56  * are still other threads waiting in the active turnstile, the thread
   57  * grabs a free turnstile off the free list.  Otherwise, it can take back
   58  * the active turnstile from the lock (thus deactivating the turnstile).
   59  *
   60  * Turnstiles are where we do priority inheritence.
   61  */
   62 
   63 #include <sys/cdefs.h>
   64 __KERNEL_RCSID(0, "$NetBSD: kern_turnstile.c,v 1.45 2022/10/26 23:27:16 riastradh Exp $");
   65 
   66 #include <sys/param.h>
   67 #include <sys/lockdebug.h>
   68 #include <sys/pool.h>
   69 #include <sys/proc.h>
   70 #include <sys/sleepq.h>
   71 #include <sys/sleeptab.h>
   72 #include <sys/systm.h>
   73 
   74 /*
   75  * Shift of 6 aligns to typical cache line size of 64 bytes;  there's no
   76  * point having two turnstile locks to back two lock objects that share one
   77  * cache line.
   78  */
   79 #define TS_HASH_SIZE    128
   80 #define TS_HASH_MASK    (TS_HASH_SIZE - 1)
   81 #define TS_HASH(obj)    (((uintptr_t)(obj) >> 6) & TS_HASH_MASK)
   82 
   83 static tschain_t        turnstile_chains[TS_HASH_SIZE] __cacheline_aligned;
   84 struct pool             turnstile_pool;
   85 
   86 static union {
   87         kmutex_t        lock;
   88         uint8_t         pad[COHERENCY_UNIT];
   89 } turnstile_locks[TS_HASH_SIZE] __cacheline_aligned;
   90 
   91 /*
   92  * turnstile_init:
   93  *
   94  *      Initialize the turnstile mechanism.
   95  */
   96 void
   97 turnstile_init(void)
   98 {
   99         int i;
  100 
  101         for (i = 0; i < TS_HASH_SIZE; i++) {
  102                 LIST_INIT(&turnstile_chains[i]);
  103                 mutex_init(&turnstile_locks[i].lock, MUTEX_DEFAULT, IPL_SCHED);
  104         }
  105 
  106         pool_init(&turnstile_pool, sizeof(turnstile_t), coherency_unit,
  107             0, 0, "tstile", NULL, IPL_NONE);
  108 
  109         turnstile_ctor(&turnstile0);
  110 }
  111 
  112 /*
  113  * turnstile_ctor:
  114  *
  115  *      Constructor for turnstiles.
  116  */
  117 void
  118 turnstile_ctor(turnstile_t *ts)
  119 {
  120 
  121         memset(ts, 0, sizeof(*ts));
  122         sleepq_init(&ts->ts_sleepq[TS_READER_Q]);
  123         sleepq_init(&ts->ts_sleepq[TS_WRITER_Q]);
  124 }
  125 
  126 /*
  127  * turnstile_remove:
  128  *
  129  *      Remove an LWP from a turnstile sleep queue and wake it.
  130  */
  131 static inline void
  132 turnstile_remove(turnstile_t *ts, lwp_t *l, int q)
  133 {
  134         turnstile_t *nts;
  135 
  136         KASSERT(l->l_ts == ts);
  137 
  138         /*
  139          * This process is no longer using the active turnstile.
  140          * Find an inactive one on the free list to give to it.
  141          */
  142         if ((nts = ts->ts_free) != NULL) {
  143                 KASSERT(TS_ALL_WAITERS(ts) > 1);
  144                 l->l_ts = nts;
  145                 ts->ts_free = nts->ts_free;
  146                 nts->ts_free = NULL;
  147         } else {
  148                 /*
  149                  * If the free list is empty, this is the last
  150                  * waiter.
  151                  */
  152                 KASSERT(TS_ALL_WAITERS(ts) == 1);
  153                 LIST_REMOVE(ts, ts_chain);
  154         }
  155 
  156         ts->ts_waiters[q]--;
  157         sleepq_remove(&ts->ts_sleepq[q], l);
  158 }
  159 
  160 /*
  161  * turnstile_lookup:
  162  *
  163  *      Look up the turnstile for the specified lock.  This acquires and
  164  *      holds the turnstile chain lock (sleep queue interlock).
  165  */
  166 turnstile_t *
  167 turnstile_lookup(wchan_t obj)
  168 {
  169         turnstile_t *ts;
  170         tschain_t *tc;
  171         u_int hash;
  172 
  173         hash = TS_HASH(obj);
  174         tc = &turnstile_chains[hash];
  175         mutex_spin_enter(&turnstile_locks[hash].lock);
  176 
  177         LIST_FOREACH(ts, tc, ts_chain)
  178                 if (ts->ts_obj == obj)
  179                         return (ts);
  180 
  181         /*
  182          * No turnstile yet for this lock.  No problem, turnstile_block()
  183          * handles this by fetching the turnstile from the blocking thread.
  184          */
  185         return (NULL);
  186 }
  187 
  188 /*
  189  * turnstile_exit:
  190  *
  191  *      Abort a turnstile operation.
  192  */
  193 void
  194 turnstile_exit(wchan_t obj)
  195 {
  196 
  197         mutex_spin_exit(&turnstile_locks[TS_HASH(obj)].lock);
  198 }
  199 
  200 /*
  201  * turnstile_lendpri:
  202  *
  203  *      Lend our priority to lwps on the blocking chain.
  204  *
  205  *      If the current owner of the lock (l->l_wchan, set by sleepq_enqueue)
  206  *      has a priority lower than ours (lwp_eprio(l)), lend our priority to
  207  *      him to avoid priority inversions.
  208  */
  209 
  210 static void
  211 turnstile_lendpri(lwp_t *cur)
  212 {
  213         lwp_t * l = cur;
  214         pri_t prio;
  215 
  216         /*
  217          * NOTE: if you get a panic in this code block, it is likely that
  218          * a lock has been destroyed or corrupted while still in use.  Try
  219          * compiling a kernel with LOCKDEBUG to pinpoint the problem.
  220          */
  221 
  222         LOCKDEBUG_BARRIER(l->l_mutex, 1);
  223         KASSERT(l == curlwp);
  224         prio = lwp_eprio(l);
  225         for (;;) {
  226                 lwp_t *owner;
  227                 turnstile_t *ts;
  228                 bool dolock;
  229 
  230                 if (l->l_wchan == NULL)
  231                         break;
  232 
  233                 /*
  234                  * Ask syncobj the owner of the lock.
  235                  */
  236                 owner = (*l->l_syncobj->sobj_owner)(l->l_wchan);
  237                 if (owner == NULL)
  238                         break;
  239 
  240                 /*
  241                  * The owner may have changed as we have dropped the tc lock.
  242                  */
  243                 if (cur == owner) {
  244                         /*
  245                          * We own the lock: stop here, sleepq_block()
  246                          * should wake up immediately.
  247                          */
  248                         break;
  249                 }
  250                 /*
  251                  * Acquire owner->l_mutex if we don't have it yet.
  252                  * Because we already have another LWP lock (l->l_mutex) held,
  253                  * we need to play a try lock dance to avoid deadlock.
  254                  */
  255                 dolock = l->l_mutex != atomic_load_relaxed(&owner->l_mutex);
  256                 if (l == owner || (dolock && !lwp_trylock(owner))) {
  257                         /*
  258                          * The owner was changed behind us or trylock failed.
  259                          * Restart from curlwp.
  260                          *
  261                          * Note that there may be a livelock here:
  262                          * the owner may try grabbing cur's lock (which is the
  263                          * tc lock) while we're trying to grab the owner's lock.
  264                          */
  265                         lwp_unlock(l);
  266                         l = cur;
  267                         lwp_lock(l);
  268                         prio = lwp_eprio(l);
  269                         continue;
  270                 }
  271                 /*
  272                  * If the owner's priority is already higher than ours,
  273                  * there's nothing to do anymore.
  274                  */
  275                 if (prio <= lwp_eprio(owner)) {
  276                         if (dolock)
  277                                 lwp_unlock(owner);
  278                         break;
  279                 }
  280                 /*
  281                  * Lend our priority to the 'owner' LWP.
  282                  *
  283                  * Update lenders info for turnstile_unlendpri.
  284                  */
  285                 ts = l->l_ts;
  286                 KASSERT(ts->ts_inheritor == owner || ts->ts_inheritor == NULL);
  287                 if (ts->ts_inheritor == NULL) {
  288                         ts->ts_inheritor = owner;
  289                         ts->ts_eprio = prio;
  290                         SLIST_INSERT_HEAD(&owner->l_pi_lenders, ts, ts_pichain);
  291                         lwp_lendpri(owner, prio);
  292                 } else if (prio > ts->ts_eprio) {
  293                         ts->ts_eprio = prio;
  294                         lwp_lendpri(owner, prio);
  295                 }
  296                 if (dolock)
  297                         lwp_unlock(l);
  298                 LOCKDEBUG_BARRIER(owner->l_mutex, 1);
  299                 l = owner;
  300         }
  301         LOCKDEBUG_BARRIER(l->l_mutex, 1);
  302         if (cur->l_mutex != atomic_load_relaxed(&l->l_mutex)) {
  303                 lwp_unlock(l);
  304                 lwp_lock(cur);
  305         }
  306         LOCKDEBUG_BARRIER(cur->l_mutex, 1);
  307 }
  308 
  309 /*
  310  * turnstile_unlendpri: undo turnstile_lendpri
  311  */
  312 
  313 static void
  314 turnstile_unlendpri(turnstile_t *ts)
  315 {
  316         lwp_t * const l = curlwp;
  317         turnstile_t *iter;
  318         turnstile_t *next;
  319         turnstile_t *prev = NULL;
  320         pri_t prio;
  321         bool dolock;
  322 
  323         KASSERT(ts->ts_inheritor != NULL);
  324         ts->ts_inheritor = NULL;
  325         dolock = (atomic_load_relaxed(&l->l_mutex) ==
  326             l->l_cpu->ci_schedstate.spc_lwplock);
  327         if (dolock) {
  328                 lwp_lock(l);
  329         }
  330 
  331         /*
  332          * the following loop does two things.
  333          *
  334          * - remove ts from the list.
  335          *
  336          * - from the rest of the list, find the highest priority.
  337          */
  338 
  339         prio = -1;
  340         KASSERT(!SLIST_EMPTY(&l->l_pi_lenders));
  341         for (iter = SLIST_FIRST(&l->l_pi_lenders);
  342             iter != NULL; iter = next) {
  343                 KASSERT(lwp_eprio(l) >= ts->ts_eprio);
  344                 next = SLIST_NEXT(iter, ts_pichain);
  345                 if (iter == ts) {
  346                         if (prev == NULL) {
  347                                 SLIST_REMOVE_HEAD(&l->l_pi_lenders,
  348                                     ts_pichain);
  349                         } else {
  350                                 SLIST_REMOVE_AFTER(prev, ts_pichain);
  351                         }
  352                 } else if (prio < iter->ts_eprio) {
  353                         prio = iter->ts_eprio;
  354                 }
  355                 prev = iter;
  356         }
  357 
  358         lwp_lendpri(l, prio);
  359 
  360         if (dolock) {
  361                 lwp_unlock(l);
  362         }
  363 }
  364 
  365 /*
  366  * turnstile_block:
  367  *
  368  *       Enter an object into the turnstile chain and prepare the current
  369  *       LWP for sleep.
  370  */
  371 void
  372 turnstile_block(turnstile_t *ts, int q, wchan_t obj, syncobj_t *sobj)
  373 {
  374         lwp_t * const l = curlwp; /* cached curlwp */
  375         turnstile_t *ots;
  376         tschain_t *tc;
  377         kmutex_t *lock;
  378         sleepq_t *sq;
  379         pri_t obase;
  380         u_int hash;
  381 
  382         hash = TS_HASH(obj);
  383         tc = &turnstile_chains[hash];
  384         lock = &turnstile_locks[hash].lock;
  385 
  386         KASSERT(q == TS_READER_Q || q == TS_WRITER_Q);
  387         KASSERT(mutex_owned(lock));
  388         KASSERT(l != NULL && l->l_ts != NULL);
  389 
  390         if (ts == NULL) {
  391                 /*
  392                  * We are the first thread to wait for this object;
  393                  * lend our turnstile to it.
  394                  */
  395                 ts = l->l_ts;
  396                 KASSERT(TS_ALL_WAITERS(ts) == 0);
  397                 KASSERT(LIST_EMPTY(&ts->ts_sleepq[TS_READER_Q]) &&
  398                         LIST_EMPTY(&ts->ts_sleepq[TS_WRITER_Q]));
  399                 ts->ts_obj = obj;
  400                 ts->ts_inheritor = NULL;
  401                 LIST_INSERT_HEAD(tc, ts, ts_chain);
  402         } else {
  403                 /*
  404                  * Object already has a turnstile.  Put our turnstile
  405                  * onto the free list, and reference the existing
  406                  * turnstile instead.
  407                  */
  408                 ots = l->l_ts;
  409                 KASSERT(ots->ts_free == NULL);
  410                 ots->ts_free = ts->ts_free;
  411                 ts->ts_free = ots;
  412                 l->l_ts = ts;
  413 
  414                 KASSERT(ts->ts_obj == obj);
  415                 KASSERT(TS_ALL_WAITERS(ts) != 0);
  416                 KASSERT(!LIST_EMPTY(&ts->ts_sleepq[TS_READER_Q]) ||
  417                         !LIST_EMPTY(&ts->ts_sleepq[TS_WRITER_Q]));
  418         }
  419 
  420         sq = &ts->ts_sleepq[q];
  421         ts->ts_waiters[q]++;
  422         sleepq_enter(sq, l, lock);
  423         LOCKDEBUG_BARRIER(lock, 1);
  424         l->l_kpriority = true;
  425         obase = l->l_kpribase;
  426         if (obase < PRI_KTHREAD)
  427                 l->l_kpribase = PRI_KTHREAD;
  428         sleepq_enqueue(sq, obj, "tstile", sobj, false);
  429 
  430         /*
  431          * Disable preemption across this entire block, as we may drop
  432          * scheduler locks (allowing preemption), and would prefer not
  433          * to be interrupted while in a state of flux.
  434          */
  435         KPREEMPT_DISABLE(l);
  436         KASSERT(lock == l->l_mutex);
  437         turnstile_lendpri(l);
  438         sleepq_block(0, false, sobj);
  439         l->l_kpribase = obase;
  440         KPREEMPT_ENABLE(l);
  441 }
  442 
  443 /*
  444  * turnstile_wakeup:
  445  *
  446  *      Wake up the specified number of threads that are blocked
  447  *      in a turnstile.
  448  */
  449 void
  450 turnstile_wakeup(turnstile_t *ts, int q, int count, lwp_t *nl)
  451 {
  452         sleepq_t *sq;
  453         kmutex_t *lock;
  454         u_int hash;
  455         lwp_t *l;
  456 
  457         hash = TS_HASH(ts->ts_obj);
  458         lock = &turnstile_locks[hash].lock;
  459         sq = &ts->ts_sleepq[q];
  460 
  461         KASSERT(q == TS_READER_Q || q == TS_WRITER_Q);
  462         KASSERT(count > 0 && count <= TS_WAITERS(ts, q));
  463         KASSERT(mutex_owned(lock));
  464         KASSERT(ts->ts_inheritor == curlwp || ts->ts_inheritor == NULL);
  465 
  466         /*
  467          * restore inherited priority if necessary.
  468          */
  469 
  470         if (ts->ts_inheritor != NULL) {
  471                 turnstile_unlendpri(ts);
  472         }
  473 
  474         if (nl != NULL) {
  475 #if defined(DEBUG) || defined(LOCKDEBUG)
  476                 LIST_FOREACH(l, sq, l_sleepchain) {
  477                         if (l == nl)
  478                                 break;
  479                 }
  480                 if (l == NULL)
  481                         panic("turnstile_wakeup: nl not on sleepq");
  482 #endif
  483                 turnstile_remove(ts, nl, q);
  484         } else {
  485                 while (count-- > 0) {
  486                         l = LIST_FIRST(sq);
  487                         KASSERT(l != NULL);
  488                         turnstile_remove(ts, l, q);
  489                 }
  490         }
  491         mutex_spin_exit(lock);
  492 }
  493 
  494 /*
  495  * turnstile_unsleep:
  496  *
  497  *      Remove an LWP from the turnstile.  This is called when the LWP has
  498  *      not been awoken normally but instead interrupted: for example, if it
  499  *      has received a signal.  It's not a valid action for turnstiles,
  500  *      since LWPs blocking on a turnstile are not interruptable.
  501  */
  502 void
  503 turnstile_unsleep(lwp_t *l, bool cleanup)
  504 {
  505 
  506         lwp_unlock(l);
  507         panic("turnstile_unsleep");
  508 }
  509 
  510 /*
  511  * turnstile_changepri:
  512  *
  513  *      Adjust the priority of an LWP residing on a turnstile.
  514  */
  515 void
  516 turnstile_changepri(lwp_t *l, pri_t pri)
  517 {
  518 
  519         /* XXX priority inheritance */
  520         sleepq_changepri(l, pri);
  521 }
  522 
  523 #if defined(LOCKDEBUG)
  524 /*
  525  * turnstile_print:
  526  *
  527  *      Given the address of a lock object, print the contents of a
  528  *      turnstile.
  529  */
  530 void
  531 turnstile_print(volatile void *obj, void (*pr)(const char *, ...))
  532 {
  533         turnstile_t *ts;
  534         tschain_t *tc;
  535         sleepq_t *rsq, *wsq;
  536         u_int hash;
  537         lwp_t *l;
  538 
  539         hash = TS_HASH(obj);
  540         tc = &turnstile_chains[hash];
  541 
  542         LIST_FOREACH(ts, tc, ts_chain)
  543                 if (ts->ts_obj == obj)
  544                         break;
  545 
  546         if (ts == NULL) {
  547                 (*pr)("Turnstile: no active turnstile for this lock.\n");
  548                 return;
  549         }
  550 
  551         rsq = &ts->ts_sleepq[TS_READER_Q];
  552         wsq = &ts->ts_sleepq[TS_WRITER_Q];
  553 
  554         (*pr)("Turnstile:\n");
  555         (*pr)("=> %d waiting readers:", TS_WAITERS(ts, TS_READER_Q));
  556         LIST_FOREACH(l, rsq, l_sleepchain) {
  557                 (*pr)(" %p", l);
  558         }
  559         (*pr)("\n");
  560 
  561         (*pr)("=> %d waiting writers:", TS_WAITERS(ts, TS_WRITER_Q));
  562         LIST_FOREACH(l, wsq, l_sleepchain) {
  563                 (*pr)(" %p", l);
  564         }
  565         (*pr)("\n");
  566 }
  567 #endif  /* LOCKDEBUG */

Cache object: 127012c419dc7c5729ce7d7c0f564910


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.