The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/kern_turnstile.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*      $NetBSD: kern_turnstile.c,v 1.23.4.1 2009/09/28 01:38:24 snj Exp $      */
    2 
    3 /*-
    4  * Copyright (c) 2002, 2006, 2007 The NetBSD Foundation, Inc.
    5  * All rights reserved.
    6  *
    7  * This code is derived from software contributed to The NetBSD Foundation
    8  * by Jason R. Thorpe and Andrew Doran.
    9  *
   10  * Redistribution and use in source and binary forms, with or without
   11  * modification, are permitted provided that the following conditions
   12  * are met:
   13  * 1. Redistributions of source code must retain the above copyright
   14  *    notice, this list of conditions and the following disclaimer.
   15  * 2. Redistributions in binary form must reproduce the above copyright
   16  *    notice, this list of conditions and the following disclaimer in the
   17  *    documentation and/or other materials provided with the distribution.
   18  *
   19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
   20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
   21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
   22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
   23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
   24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
   25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
   26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
   27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
   28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   29  * POSSIBILITY OF SUCH DAMAGE.
   30  */
   31 
   32 /*
   33  * Turnstiles are described in detail in:
   34  *
   35  *      Solaris Internals: Core Kernel Architecture, Jim Mauro and
   36  *          Richard McDougall.
   37  *
   38  * Turnstiles are kept in a hash table.  There are likely to be many more
   39  * synchronisation objects than there are threads.  Since a thread can block
   40  * on only one lock at a time, we only need one turnstile per thread, and
   41  * so they are allocated at thread creation time.
   42  *
   43  * When a thread decides it needs to block on a lock, it looks up the
   44  * active turnstile for that lock.  If no active turnstile exists, then
   45  * the process lends its turnstile to the lock.  If there is already an
   46  * active turnstile for the lock, the thread places its turnstile on a
   47  * list of free turnstiles, and references the active one instead.
   48  *
   49  * The act of looking up the turnstile acquires an interlock on the sleep
   50  * queue.  If a thread decides it doesn't need to block after all, then this
   51  * interlock must be released by explicitly aborting the turnstile
   52  * operation.
   53  *
   54  * When a thread is awakened, it needs to get its turnstile back.  If there
   55  * are still other threads waiting in the active turnstile, the thread
   56  * grabs a free turnstile off the free list.  Otherwise, it can take back
   57  * the active turnstile from the lock (thus deactivating the turnstile).
   58  *
   59  * Turnstiles are the place to do priority inheritence.
   60  */
   61 
   62 #include <sys/cdefs.h>
   63 __KERNEL_RCSID(0, "$NetBSD: kern_turnstile.c,v 1.23.4.1 2009/09/28 01:38:24 snj Exp $");
   64 
   65 #include <sys/param.h>
   66 #include <sys/lockdebug.h>
   67 #include <sys/pool.h>
   68 #include <sys/proc.h> 
   69 #include <sys/sleepq.h>
   70 #include <sys/systm.h>
   71 
   72 #include <uvm/uvm_extern.h>
   73 
   74 #define TS_HASH_SIZE    64
   75 #define TS_HASH_MASK    (TS_HASH_SIZE - 1)
   76 #define TS_HASH(obj)    (((uintptr_t)(obj) >> 3) & TS_HASH_MASK)
   77 
   78 tschain_t       turnstile_tab[TS_HASH_SIZE];
   79 pool_cache_t    turnstile_cache;
   80 
   81 int     turnstile_ctor(void *, void *, int);
   82 
   83 extern turnstile_t turnstile0;
   84 
   85 /*
   86  * turnstile_init:
   87  *
   88  *      Initialize the turnstile mechanism.
   89  */
   90 void
   91 turnstile_init(void)
   92 {
   93         tschain_t *tc;
   94         int i;
   95 
   96         for (i = 0; i < TS_HASH_SIZE; i++) {
   97                 tc = &turnstile_tab[i];
   98                 LIST_INIT(&tc->tc_chain);
   99                 mutex_init(&tc->tc_mutex, MUTEX_DEFAULT, IPL_SCHED);
  100         }
  101 
  102         turnstile_cache = pool_cache_init(sizeof(turnstile_t), 0, 0, 0,
  103             "tstilepl", NULL, IPL_NONE, turnstile_ctor, NULL, NULL);
  104         KASSERT(turnstile_cache != NULL);
  105 
  106         (void)turnstile_ctor(NULL, &turnstile0, 0);
  107 }
  108 
  109 /*
  110  * turnstile_ctor:
  111  *
  112  *      Constructor for turnstiles.
  113  */
  114 int
  115 turnstile_ctor(void *arg, void *obj, int flags)
  116 {
  117         turnstile_t *ts = obj;
  118 
  119         memset(ts, 0, sizeof(*ts));
  120         sleepq_init(&ts->ts_sleepq[TS_READER_Q]);
  121         sleepq_init(&ts->ts_sleepq[TS_WRITER_Q]);
  122         return (0);
  123 }
  124 
  125 /*
  126  * turnstile_remove:
  127  *
  128  *      Remove an LWP from a turnstile sleep queue and wake it.
  129  */
  130 static inline void
  131 turnstile_remove(turnstile_t *ts, lwp_t *l, int q)
  132 {
  133         turnstile_t *nts;
  134 
  135         KASSERT(l->l_ts == ts);
  136 
  137         /*
  138          * This process is no longer using the active turnstile.
  139          * Find an inactive one on the free list to give to it.
  140          */
  141         if ((nts = ts->ts_free) != NULL) {
  142                 KASSERT(TS_ALL_WAITERS(ts) > 1);
  143                 l->l_ts = nts;
  144                 ts->ts_free = nts->ts_free;
  145                 nts->ts_free = NULL;
  146         } else {
  147                 /*
  148                  * If the free list is empty, this is the last
  149                  * waiter.
  150                  */
  151                 KASSERT(TS_ALL_WAITERS(ts) == 1);
  152                 LIST_REMOVE(ts, ts_chain);
  153         }
  154 
  155         ts->ts_waiters[q]--;
  156         (void)sleepq_remove(&ts->ts_sleepq[q], l);
  157 }
  158 
  159 /*
  160  * turnstile_lookup:
  161  *
  162  *      Look up the turnstile for the specified lock.  This acquires and
  163  *      holds the turnstile chain lock (sleep queue interlock).
  164  */
  165 turnstile_t *
  166 turnstile_lookup(wchan_t obj)
  167 {
  168         turnstile_t *ts;
  169         tschain_t *tc;
  170 
  171         tc = &turnstile_tab[TS_HASH(obj)];
  172         mutex_spin_enter(&tc->tc_mutex);
  173 
  174         LIST_FOREACH(ts, &tc->tc_chain, ts_chain)
  175                 if (ts->ts_obj == obj)
  176                         return (ts);
  177 
  178         /*
  179          * No turnstile yet for this lock.  No problem, turnstile_block()
  180          * handles this by fetching the turnstile from the blocking thread.
  181          */
  182         return (NULL);
  183 }
  184 
  185 /*
  186  * turnstile_exit:
  187  *
  188  *      Abort a turnstile operation.
  189  */
  190 void
  191 turnstile_exit(wchan_t obj)
  192 {
  193         tschain_t *tc;
  194 
  195         tc = &turnstile_tab[TS_HASH(obj)];
  196         mutex_spin_exit(&tc->tc_mutex);
  197 }
  198 
  199 /*
  200  * turnstile_block:
  201  *
  202  *       Enter an object into the turnstile chain and prepare the current
  203  *       LWP for sleep.
  204  */
  205 void
  206 turnstile_block(turnstile_t *ts, int q, wchan_t obj, syncobj_t *sobj)
  207 {
  208         lwp_t *l;
  209         lwp_t *cur; /* cached curlwp */
  210         lwp_t *owner;
  211         turnstile_t *ots;
  212         tschain_t *tc;
  213         sleepq_t *sq;
  214         pri_t prio, obase;
  215 
  216         tc = &turnstile_tab[TS_HASH(obj)];
  217         l = cur = curlwp;
  218 
  219         KASSERT(q == TS_READER_Q || q == TS_WRITER_Q);
  220         KASSERT(mutex_owned(&tc->tc_mutex));
  221         KASSERT(l != NULL && l->l_ts != NULL);
  222 
  223         if (ts == NULL) {
  224                 /*
  225                  * We are the first thread to wait for this object;
  226                  * lend our turnstile to it.
  227                  */
  228                 ts = l->l_ts;
  229                 KASSERT(TS_ALL_WAITERS(ts) == 0);
  230                 KASSERT(TAILQ_EMPTY(&ts->ts_sleepq[TS_READER_Q]) &&
  231                         TAILQ_EMPTY(&ts->ts_sleepq[TS_WRITER_Q]));
  232                 ts->ts_obj = obj;
  233                 ts->ts_inheritor = NULL;
  234                 LIST_INSERT_HEAD(&tc->tc_chain, ts, ts_chain);
  235         } else {
  236                 /*
  237                  * Object already has a turnstile.  Put our turnstile
  238                  * onto the free list, and reference the existing
  239                  * turnstile instead.
  240                  */
  241                 ots = l->l_ts;
  242                 KASSERT(ots->ts_free == NULL);
  243                 ots->ts_free = ts->ts_free;
  244                 ts->ts_free = ots;
  245                 l->l_ts = ts;
  246 
  247                 KASSERT(ts->ts_obj == obj);
  248                 KASSERT(TS_ALL_WAITERS(ts) != 0);
  249                 KASSERT(!TAILQ_EMPTY(&ts->ts_sleepq[TS_READER_Q]) ||
  250                         !TAILQ_EMPTY(&ts->ts_sleepq[TS_WRITER_Q]));
  251         }
  252 
  253         sq = &ts->ts_sleepq[q];
  254         ts->ts_waiters[q]++;
  255         sleepq_enter(sq, l, &tc->tc_mutex);
  256         /* now tc->tc_mutex is also cur->l_mutex and l->l_mutex */
  257         LOCKDEBUG_BARRIER(&tc->tc_mutex, 1);
  258         l->l_kpriority = true;
  259         obase = l->l_kpribase;
  260         if (obase < PRI_KTHREAD)
  261                 l->l_kpribase = PRI_KTHREAD;
  262         sleepq_enqueue(sq, obj, "tstile", sobj);
  263 
  264         /*
  265          * Disable preemption across this entire block, as we may drop
  266          * scheduler locks (allowing preemption), and would prefer not
  267          * to be interrupted while in a state of flux.
  268          */
  269         KPREEMPT_DISABLE(l);
  270 
  271         /*
  272          * Lend our priority to lwps on the blocking chain.
  273          *
  274          * NOTE: if you get a panic in this code block, it is likely that
  275          * a lock has been destroyed or corrupted while still in use.  Try
  276          * compiling a kernel with LOCKDEBUG to pinpoint the problem.
  277          */
  278         prio = lwp_eprio(l);
  279 
  280         for (;;) {
  281                 bool dolock;
  282 
  283                 if (l->l_wchan == NULL)
  284                         break;
  285 
  286                 owner = (*l->l_syncobj->sobj_owner)(l->l_wchan);
  287                 if (owner == NULL)
  288                         break;
  289 
  290                 /* The owner may have changed as we have dropped the tc lock */
  291                 if (cur == owner) {
  292                         /*
  293                          * we own the lock: stop here, sleepq_block()
  294                          * should wake up immediatly
  295                          */
  296                         break;
  297                 }
  298 
  299                 if (l == owner) {
  300                         /* owner has changed, restart from curlwp */
  301                         lwp_unlock(l);
  302                         l = cur;
  303                         lwp_lock(l);
  304                         prio = lwp_eprio(l);
  305                         continue;
  306                 }
  307                         
  308                 if (l->l_mutex != owner->l_mutex)
  309                         dolock = true;
  310                 else
  311                         dolock = false;
  312                 if (dolock && !lwp_trylock(owner)) {
  313                         /*
  314                          * restart from curlwp.
  315                          * Note that there may be a livelock here:
  316                          * the owner may try grabing cur's lock (which is
  317                          * the tc lock) while we're trying to grab
  318                          * the owner's lock.
  319                          */
  320                         lwp_unlock(l);
  321                         l = cur;
  322                         lwp_lock(l);
  323                         prio = lwp_eprio(l);
  324                         continue;
  325                 }
  326                 if (prio <= lwp_eprio(owner)) {
  327                         if (dolock)
  328                                 lwp_unlock(owner);
  329                         break;
  330                 }
  331                 ts = l->l_ts;
  332                 KASSERT(ts->ts_inheritor == owner || ts->ts_inheritor == NULL);
  333                 if (ts->ts_inheritor == NULL) {
  334                         ts->ts_inheritor = owner;
  335                         ts->ts_eprio = prio;
  336                         SLIST_INSERT_HEAD(&owner->l_pi_lenders, ts, ts_pichain);
  337                         lwp_lendpri(owner, prio);
  338                 } else if (prio > ts->ts_eprio) {
  339                         ts->ts_eprio = prio;
  340                         lwp_lendpri(owner, prio);
  341                 }
  342                 if (dolock)
  343                         lwp_unlock(l);
  344                 l = owner;
  345         }
  346         LOCKDEBUG_BARRIER(l->l_mutex, 1);
  347         if (cur->l_mutex != l->l_mutex) {
  348                 lwp_unlock(l);
  349                 lwp_lock(cur);
  350         }
  351         LOCKDEBUG_BARRIER(cur->l_mutex, 1);
  352 
  353         sleepq_block(0, false);
  354         cur->l_kpribase = obase;
  355         KPREEMPT_ENABLE(cur);
  356 }
  357 
  358 /*
  359  * turnstile_wakeup:
  360  *
  361  *      Wake up the specified number of threads that are blocked
  362  *      in a turnstile.
  363  */
  364 void
  365 turnstile_wakeup(turnstile_t *ts, int q, int count, lwp_t *nl)
  366 {
  367         sleepq_t *sq;
  368         tschain_t *tc;
  369         lwp_t *l;
  370 
  371         tc = &turnstile_tab[TS_HASH(ts->ts_obj)];
  372         sq = &ts->ts_sleepq[q];
  373 
  374         KASSERT(q == TS_READER_Q || q == TS_WRITER_Q);
  375         KASSERT(count > 0 && count <= TS_WAITERS(ts, q));
  376         KASSERT(mutex_owned(&tc->tc_mutex));
  377         KASSERT(ts->ts_inheritor == curlwp || ts->ts_inheritor == NULL);
  378 
  379         /*
  380          * restore inherited priority if necessary.
  381          */
  382 
  383         if (ts->ts_inheritor != NULL) {
  384                 turnstile_t *iter;
  385                 turnstile_t *next;
  386                 turnstile_t *prev = NULL;
  387                 pri_t prio;
  388                 bool dolock;
  389 
  390                 ts->ts_inheritor = NULL;
  391                 l = curlwp;
  392 
  393                 dolock = l->l_mutex == l->l_cpu->ci_schedstate.spc_lwplock;
  394                 if (dolock) {
  395                         lwp_lock(l);
  396                 }
  397 
  398                 /*
  399                  * the following loop does two things.
  400                  *
  401                  * - remove ts from the list.
  402                  *
  403                  * - from the rest of the list, find the highest priority.
  404                  */
  405 
  406                 prio = -1;
  407                 KASSERT(!SLIST_EMPTY(&l->l_pi_lenders));
  408                 for (iter = SLIST_FIRST(&l->l_pi_lenders);
  409                     iter != NULL; iter = next) {
  410                         KASSERT(lwp_eprio(l) >= ts->ts_eprio);
  411                         next = SLIST_NEXT(iter, ts_pichain);
  412                         if (iter == ts) {
  413                                 if (prev == NULL) {
  414                                         SLIST_REMOVE_HEAD(&l->l_pi_lenders,
  415                                             ts_pichain);
  416                                 } else {
  417                                         SLIST_REMOVE_AFTER(prev, ts_pichain);
  418                                 }
  419                         } else if (prio < iter->ts_eprio) {
  420                                 prio = iter->ts_eprio;
  421                         }
  422                         prev = iter;
  423                 }
  424 
  425                 lwp_lendpri(l, prio);
  426 
  427                 if (dolock) {
  428                         lwp_unlock(l);
  429                 }
  430         }
  431 
  432         if (nl != NULL) {
  433 #if defined(DEBUG) || defined(LOCKDEBUG)
  434                 TAILQ_FOREACH(l, sq, l_sleepchain) {
  435                         if (l == nl)
  436                                 break;
  437                 }
  438                 if (l == NULL)
  439                         panic("turnstile_wakeup: nl not on sleepq");
  440 #endif
  441                 turnstile_remove(ts, nl, q);
  442         } else {
  443                 while (count-- > 0) {
  444                         l = TAILQ_FIRST(sq);
  445                         KASSERT(l != NULL);
  446                         turnstile_remove(ts, l, q);
  447                 }
  448         }
  449         mutex_spin_exit(&tc->tc_mutex);
  450 }
  451 
  452 /*
  453  * turnstile_unsleep:
  454  *
  455  *      Remove an LWP from the turnstile.  This is called when the LWP has
  456  *      not been awoken normally but instead interrupted: for example, if it
  457  *      has received a signal.  It's not a valid action for turnstiles,
  458  *      since LWPs blocking on a turnstile are not interruptable.
  459  */
  460 u_int
  461 turnstile_unsleep(lwp_t *l, bool cleanup)
  462 {
  463 
  464         lwp_unlock(l);
  465         panic("turnstile_unsleep");
  466 }
  467 
  468 /*
  469  * turnstile_changepri:
  470  *
  471  *      Adjust the priority of an LWP residing on a turnstile.
  472  */
  473 void
  474 turnstile_changepri(lwp_t *l, pri_t pri)
  475 {
  476 
  477         /* XXX priority inheritance */
  478         sleepq_changepri(l, pri);
  479 }
  480 
  481 #if defined(LOCKDEBUG)
  482 /*
  483  * turnstile_print:
  484  *
  485  *      Given the address of a lock object, print the contents of a
  486  *      turnstile.
  487  */
  488 void
  489 turnstile_print(volatile void *obj, void (*pr)(const char *, ...))
  490 {
  491         turnstile_t *ts;
  492         tschain_t *tc;
  493         sleepq_t *rsq, *wsq;
  494         lwp_t *l;
  495 
  496         tc = &turnstile_tab[TS_HASH(obj)];
  497 
  498         LIST_FOREACH(ts, &tc->tc_chain, ts_chain)
  499                 if (ts->ts_obj == obj)
  500                         break;
  501 
  502         (*pr)("Turnstile chain at %p.\n", tc);
  503         if (ts == NULL) {
  504                 (*pr)("=> No active turnstile for this lock.\n");
  505                 return;
  506         }
  507 
  508         rsq = &ts->ts_sleepq[TS_READER_Q];
  509         wsq = &ts->ts_sleepq[TS_WRITER_Q];
  510 
  511         (*pr)("=> Turnstile at %p (wrq=%p, rdq=%p).\n", ts, rsq, wsq);
  512 
  513         (*pr)("=> %d waiting readers:", TS_WAITERS(ts, TS_READER_Q));
  514         TAILQ_FOREACH(l, rsq, l_sleepchain) {
  515                 (*pr)(" %p", l);
  516         }
  517         (*pr)("\n");
  518 
  519         (*pr)("=> %d waiting writers:", TS_WAITERS(ts, TS_WRITER_Q));
  520         TAILQ_FOREACH(l, wsq, l_sleepchain) {
  521                 (*pr)(" %p", l);
  522         }
  523         (*pr)("\n");
  524 }
  525 #endif  /* LOCKDEBUG */

Cache object: 9a3fecadaa0043844d7d5e716dbdad35


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.