The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/kern_rwlock.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 2006 John Baldwin <jhb@FreeBSD.org>
    3  * All rights reserved.
    4  *
    5  * Redistribution and use in source and binary forms, with or without
    6  * modification, are permitted provided that the following conditions
    7  * are met:
    8  * 1. Redistributions of source code must retain the above copyright
    9  *    notice, this list of conditions and the following disclaimer.
   10  * 2. Redistributions in binary form must reproduce the above copyright
   11  *    notice, this list of conditions and the following disclaimer in the
   12  *    documentation and/or other materials provided with the distribution.
   13  *
   14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   24  * SUCH DAMAGE.
   25  */
   26 
   27 /*
   28  * Machine independent bits of reader/writer lock implementation.
   29  */
   30 
   31 #include <sys/cdefs.h>
   32 __FBSDID("$FreeBSD: releng/11.2/sys/kern/kern_rwlock.c 334437 2018-05-31 15:58:28Z mjg $");
   33 
   34 #include "opt_ddb.h"
   35 #include "opt_hwpmc_hooks.h"
   36 #include "opt_no_adaptive_rwlocks.h"
   37 
   38 #include <sys/param.h>
   39 #include <sys/kdb.h>
   40 #include <sys/ktr.h>
   41 #include <sys/kernel.h>
   42 #include <sys/lock.h>
   43 #include <sys/mutex.h>
   44 #include <sys/proc.h>
   45 #include <sys/rwlock.h>
   46 #include <sys/sched.h>
   47 #include <sys/smp.h>
   48 #include <sys/sysctl.h>
   49 #include <sys/systm.h>
   50 #include <sys/turnstile.h>
   51 
   52 #include <machine/cpu.h>
   53 
   54 #if defined(SMP) && !defined(NO_ADAPTIVE_RWLOCKS)
   55 #define ADAPTIVE_RWLOCKS
   56 #endif
   57 
   58 #ifdef HWPMC_HOOKS
   59 #include <sys/pmckern.h>
   60 PMC_SOFT_DECLARE( , , lock, failed);
   61 #endif
   62 
   63 /*
   64  * Return the rwlock address when the lock cookie address is provided.
   65  * This functionality assumes that struct rwlock* have a member named rw_lock.
   66  */
   67 #define rwlock2rw(c)    (__containerof(c, struct rwlock, rw_lock))
   68 
   69 #ifdef DDB
   70 #include <ddb/ddb.h>
   71 
   72 static void     db_show_rwlock(const struct lock_object *lock);
   73 #endif
   74 static void     assert_rw(const struct lock_object *lock, int what);
   75 static void     lock_rw(struct lock_object *lock, uintptr_t how);
   76 #ifdef KDTRACE_HOOKS
   77 static int      owner_rw(const struct lock_object *lock, struct thread **owner);
   78 #endif
   79 static uintptr_t unlock_rw(struct lock_object *lock);
   80 
   81 struct lock_class lock_class_rw = {
   82         .lc_name = "rw",
   83         .lc_flags = LC_SLEEPLOCK | LC_RECURSABLE | LC_UPGRADABLE,
   84         .lc_assert = assert_rw,
   85 #ifdef DDB
   86         .lc_ddb_show = db_show_rwlock,
   87 #endif
   88         .lc_lock = lock_rw,
   89         .lc_unlock = unlock_rw,
   90 #ifdef KDTRACE_HOOKS
   91         .lc_owner = owner_rw,
   92 #endif
   93 };
   94 
   95 #ifdef ADAPTIVE_RWLOCKS
   96 static int __read_frequently rowner_retries;
   97 static int __read_frequently rowner_loops;
   98 static SYSCTL_NODE(_debug, OID_AUTO, rwlock, CTLFLAG_RD, NULL,
   99     "rwlock debugging");
  100 SYSCTL_INT(_debug_rwlock, OID_AUTO, retry, CTLFLAG_RW, &rowner_retries, 0, "");
  101 SYSCTL_INT(_debug_rwlock, OID_AUTO, loops, CTLFLAG_RW, &rowner_loops, 0, "");
  102 
  103 static struct lock_delay_config __read_frequently rw_delay;
  104 
  105 SYSCTL_INT(_debug_rwlock, OID_AUTO, delay_base, CTLFLAG_RW, &rw_delay.base,
  106     0, "");
  107 SYSCTL_INT(_debug_rwlock, OID_AUTO, delay_max, CTLFLAG_RW, &rw_delay.max,
  108     0, "");
  109 
  110 static void
  111 rw_lock_delay_init(void *arg __unused)
  112 {
  113 
  114         lock_delay_default_init(&rw_delay);
  115         rowner_retries = 10;
  116         rowner_loops = max(10000, rw_delay.max);
  117 }
  118 LOCK_DELAY_SYSINIT(rw_lock_delay_init);
  119 #endif
  120 
  121 /*
  122  * Return a pointer to the owning thread if the lock is write-locked or
  123  * NULL if the lock is unlocked or read-locked.
  124  */
  125 
  126 #define lv_rw_wowner(v)                                                 \
  127         ((v) & RW_LOCK_READ ? NULL :                                    \
  128          (struct thread *)RW_OWNER((v)))
  129 
  130 #define rw_wowner(rw)   lv_rw_wowner(RW_READ_VALUE(rw))
  131 
  132 /*
  133  * Returns if a write owner is recursed.  Write ownership is not assured
  134  * here and should be previously checked.
  135  */
  136 #define rw_recursed(rw)         ((rw)->rw_recurse != 0)
  137 
  138 /*
  139  * Return true if curthread helds the lock.
  140  */
  141 #define rw_wlocked(rw)          (rw_wowner((rw)) == curthread)
  142 
  143 /*
  144  * Return a pointer to the owning thread for this lock who should receive
  145  * any priority lent by threads that block on this lock.  Currently this
  146  * is identical to rw_wowner().
  147  */
  148 #define rw_owner(rw)            rw_wowner(rw)
  149 
  150 #ifndef INVARIANTS
  151 #define __rw_assert(c, what, file, line)
  152 #endif
  153 
  154 void
  155 assert_rw(const struct lock_object *lock, int what)
  156 {
  157 
  158         rw_assert((const struct rwlock *)lock, what);
  159 }
  160 
  161 void
  162 lock_rw(struct lock_object *lock, uintptr_t how)
  163 {
  164         struct rwlock *rw;
  165 
  166         rw = (struct rwlock *)lock;
  167         if (how)
  168                 rw_rlock(rw);
  169         else
  170                 rw_wlock(rw);
  171 }
  172 
  173 uintptr_t
  174 unlock_rw(struct lock_object *lock)
  175 {
  176         struct rwlock *rw;
  177 
  178         rw = (struct rwlock *)lock;
  179         rw_assert(rw, RA_LOCKED | LA_NOTRECURSED);
  180         if (rw->rw_lock & RW_LOCK_READ) {
  181                 rw_runlock(rw);
  182                 return (1);
  183         } else {
  184                 rw_wunlock(rw);
  185                 return (0);
  186         }
  187 }
  188 
  189 #ifdef KDTRACE_HOOKS
  190 int
  191 owner_rw(const struct lock_object *lock, struct thread **owner)
  192 {
  193         const struct rwlock *rw = (const struct rwlock *)lock;
  194         uintptr_t x = rw->rw_lock;
  195 
  196         *owner = rw_wowner(rw);
  197         return ((x & RW_LOCK_READ) != 0 ?  (RW_READERS(x) != 0) :
  198             (*owner != NULL));
  199 }
  200 #endif
  201 
  202 void
  203 _rw_init_flags(volatile uintptr_t *c, const char *name, int opts)
  204 {
  205         struct rwlock *rw;
  206         int flags;
  207 
  208         rw = rwlock2rw(c);
  209 
  210         MPASS((opts & ~(RW_DUPOK | RW_NOPROFILE | RW_NOWITNESS | RW_QUIET |
  211             RW_RECURSE | RW_NEW)) == 0);
  212         ASSERT_ATOMIC_LOAD_PTR(rw->rw_lock,
  213             ("%s: rw_lock not aligned for %s: %p", __func__, name,
  214             &rw->rw_lock));
  215 
  216         flags = LO_UPGRADABLE;
  217         if (opts & RW_DUPOK)
  218                 flags |= LO_DUPOK;
  219         if (opts & RW_NOPROFILE)
  220                 flags |= LO_NOPROFILE;
  221         if (!(opts & RW_NOWITNESS))
  222                 flags |= LO_WITNESS;
  223         if (opts & RW_RECURSE)
  224                 flags |= LO_RECURSABLE;
  225         if (opts & RW_QUIET)
  226                 flags |= LO_QUIET;
  227         if (opts & RW_NEW)
  228                 flags |= LO_NEW;
  229 
  230         lock_init(&rw->lock_object, &lock_class_rw, name, NULL, flags);
  231         rw->rw_lock = RW_UNLOCKED;
  232         rw->rw_recurse = 0;
  233 }
  234 
  235 void
  236 _rw_destroy(volatile uintptr_t *c)
  237 {
  238         struct rwlock *rw;
  239 
  240         rw = rwlock2rw(c);
  241 
  242         KASSERT(rw->rw_lock == RW_UNLOCKED, ("rw lock %p not unlocked", rw));
  243         KASSERT(rw->rw_recurse == 0, ("rw lock %p still recursed", rw));
  244         rw->rw_lock = RW_DESTROYED;
  245         lock_destroy(&rw->lock_object);
  246 }
  247 
  248 void
  249 rw_sysinit(void *arg)
  250 {
  251         struct rw_args *args;
  252 
  253         args = arg;
  254         rw_init_flags((struct rwlock *)args->ra_rw, args->ra_desc,
  255             args->ra_flags);
  256 }
  257 
  258 int
  259 _rw_wowned(const volatile uintptr_t *c)
  260 {
  261 
  262         return (rw_wowner(rwlock2rw(c)) == curthread);
  263 }
  264 
  265 void
  266 _rw_wlock_cookie(volatile uintptr_t *c, const char *file, int line)
  267 {
  268         struct rwlock *rw;
  269         uintptr_t tid, v;
  270 
  271         rw = rwlock2rw(c);
  272 
  273         KASSERT(kdb_active != 0 || SCHEDULER_STOPPED() ||
  274             !TD_IS_IDLETHREAD(curthread),
  275             ("rw_wlock() by idle thread %p on rwlock %s @ %s:%d",
  276             curthread, rw->lock_object.lo_name, file, line));
  277         KASSERT(rw->rw_lock != RW_DESTROYED,
  278             ("rw_wlock() of destroyed rwlock @ %s:%d", file, line));
  279         WITNESS_CHECKORDER(&rw->lock_object, LOP_NEWORDER | LOP_EXCLUSIVE, file,
  280             line, NULL);
  281         tid = (uintptr_t)curthread;
  282         v = RW_UNLOCKED;
  283         if (!_rw_write_lock_fetch(rw, &v, tid))
  284                 _rw_wlock_hard(rw, v, file, line);
  285         else
  286                 LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(rw__acquire, rw,
  287                     0, 0, file, line, LOCKSTAT_WRITER);
  288 
  289         LOCK_LOG_LOCK("WLOCK", &rw->lock_object, 0, rw->rw_recurse, file, line);
  290         WITNESS_LOCK(&rw->lock_object, LOP_EXCLUSIVE, file, line);
  291         TD_LOCKS_INC(curthread);
  292 }
  293 
  294 int
  295 __rw_try_wlock_int(struct rwlock *rw LOCK_FILE_LINE_ARG_DEF)
  296 {
  297         struct thread *td;
  298         uintptr_t tid, v;
  299         int rval;
  300         bool recursed;
  301 
  302         td = curthread;
  303         tid = (uintptr_t)td;
  304         if (SCHEDULER_STOPPED_TD(td))
  305                 return (1);
  306 
  307         KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(td),
  308             ("rw_try_wlock() by idle thread %p on rwlock %s @ %s:%d",
  309             curthread, rw->lock_object.lo_name, file, line));
  310         KASSERT(rw->rw_lock != RW_DESTROYED,
  311             ("rw_try_wlock() of destroyed rwlock @ %s:%d", file, line));
  312 
  313         rval = 1;
  314         recursed = false;
  315         v = RW_UNLOCKED;
  316         for (;;) {
  317                 if (atomic_fcmpset_acq_ptr(&rw->rw_lock, &v, tid))
  318                         break;
  319                 if (v == RW_UNLOCKED)
  320                         continue;
  321                 if (v == tid && (rw->lock_object.lo_flags & LO_RECURSABLE)) {
  322                         rw->rw_recurse++;
  323                         atomic_set_ptr(&rw->rw_lock, RW_LOCK_WRITER_RECURSED);
  324                         break;
  325                 }
  326                 rval = 0;
  327                 break;
  328         }
  329 
  330         LOCK_LOG_TRY("WLOCK", &rw->lock_object, 0, rval, file, line);
  331         if (rval) {
  332                 WITNESS_LOCK(&rw->lock_object, LOP_EXCLUSIVE | LOP_TRYLOCK,
  333                     file, line);
  334                 if (!recursed)
  335                         LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(rw__acquire,
  336                             rw, 0, 0, file, line, LOCKSTAT_WRITER);
  337                 TD_LOCKS_INC(curthread);
  338         }
  339         return (rval);
  340 }
  341 
  342 int
  343 __rw_try_wlock(volatile uintptr_t *c, const char *file, int line)
  344 {
  345         struct rwlock *rw;
  346 
  347         rw = rwlock2rw(c);
  348         return (__rw_try_wlock_int(rw LOCK_FILE_LINE_ARG));
  349 }
  350 
  351 void
  352 _rw_wunlock_cookie(volatile uintptr_t *c, const char *file, int line)
  353 {
  354         struct rwlock *rw;
  355 
  356         rw = rwlock2rw(c);
  357 
  358         KASSERT(rw->rw_lock != RW_DESTROYED,
  359             ("rw_wunlock() of destroyed rwlock @ %s:%d", file, line));
  360         __rw_assert(c, RA_WLOCKED, file, line);
  361         WITNESS_UNLOCK(&rw->lock_object, LOP_EXCLUSIVE, file, line);
  362         LOCK_LOG_LOCK("WUNLOCK", &rw->lock_object, 0, rw->rw_recurse, file,
  363             line);
  364 
  365 #ifdef LOCK_PROFILING
  366         _rw_wunlock_hard(rw, (uintptr_t)curthread, file, line);
  367 #else
  368         __rw_wunlock(rw, curthread, file, line);
  369 #endif
  370 
  371         TD_LOCKS_DEC(curthread);
  372 }
  373 
  374 /*
  375  * Determines whether a new reader can acquire a lock.  Succeeds if the
  376  * reader already owns a read lock and the lock is locked for read to
  377  * prevent deadlock from reader recursion.  Also succeeds if the lock
  378  * is unlocked and has no writer waiters or spinners.  Failing otherwise
  379  * prioritizes writers before readers.
  380  */
  381 static bool __always_inline
  382 __rw_can_read(struct thread *td, uintptr_t v, bool fp)
  383 {
  384 
  385         if ((v & (RW_LOCK_READ | RW_LOCK_WRITE_WAITERS | RW_LOCK_WRITE_SPINNER))
  386             == RW_LOCK_READ)
  387                 return (true);
  388         if (!fp && td->td_rw_rlocks && (v & RW_LOCK_READ))
  389                 return (true);
  390         return (false);
  391 }
  392 
  393 static bool __always_inline
  394 __rw_rlock_try(struct rwlock *rw, struct thread *td, uintptr_t *vp, bool fp
  395     LOCK_FILE_LINE_ARG_DEF)
  396 {
  397 
  398         /*
  399          * Handle the easy case.  If no other thread has a write
  400          * lock, then try to bump up the count of read locks.  Note
  401          * that we have to preserve the current state of the
  402          * RW_LOCK_WRITE_WAITERS flag.  If we fail to acquire a
  403          * read lock, then rw_lock must have changed, so restart
  404          * the loop.  Note that this handles the case of a
  405          * completely unlocked rwlock since such a lock is encoded
  406          * as a read lock with no waiters.
  407          */
  408         while (__rw_can_read(td, *vp, fp)) {
  409                 if (atomic_fcmpset_acq_ptr(&rw->rw_lock, vp,
  410                         *vp + RW_ONE_READER)) {
  411                         if (LOCK_LOG_TEST(&rw->lock_object, 0))
  412                                 CTR4(KTR_LOCK,
  413                                     "%s: %p succeed %p -> %p", __func__,
  414                                     rw, (void *)*vp,
  415                                     (void *)(*vp + RW_ONE_READER));
  416                         td->td_rw_rlocks++;
  417                         return (true);
  418                 }
  419         }
  420         return (false);
  421 }
  422 
  423 static void __noinline
  424 __rw_rlock_hard(struct rwlock *rw, struct thread *td, uintptr_t v
  425     LOCK_FILE_LINE_ARG_DEF)
  426 {
  427         struct turnstile *ts;
  428         struct thread *owner;
  429 #ifdef ADAPTIVE_RWLOCKS
  430         int spintries = 0;
  431         int i, n;
  432 #endif
  433 #ifdef LOCK_PROFILING
  434         uint64_t waittime = 0;
  435         int contested = 0;
  436 #endif
  437 #if defined(ADAPTIVE_RWLOCKS) || defined(KDTRACE_HOOKS)
  438         struct lock_delay_arg lda;
  439 #endif
  440 #ifdef KDTRACE_HOOKS
  441         u_int sleep_cnt = 0;
  442         int64_t sleep_time = 0;
  443         int64_t all_time = 0;
  444 #endif
  445 #if defined(KDTRACE_HOOKS) || defined(LOCK_PROFILING)
  446         uintptr_t state;
  447         int doing_lockprof = 0;
  448 #endif
  449 
  450 #ifdef KDTRACE_HOOKS
  451         if (LOCKSTAT_PROFILE_ENABLED(rw__acquire)) {
  452                 if (__rw_rlock_try(rw, td, &v, false LOCK_FILE_LINE_ARG))
  453                         goto out_lockstat;
  454                 doing_lockprof = 1;
  455                 all_time -= lockstat_nsecs(&rw->lock_object);
  456                 state = v;
  457         }
  458 #endif
  459 #ifdef LOCK_PROFILING
  460         doing_lockprof = 1;
  461         state = v;
  462 #endif
  463 
  464         if (SCHEDULER_STOPPED())
  465                 return;
  466 
  467 #if defined(ADAPTIVE_RWLOCKS)
  468         lock_delay_arg_init(&lda, &rw_delay);
  469 #elif defined(KDTRACE_HOOKS)
  470         lock_delay_arg_init(&lda, NULL);
  471 #endif
  472 
  473 #ifdef HWPMC_HOOKS
  474         PMC_SOFT_CALL( , , lock, failed);
  475 #endif
  476         lock_profile_obtain_lock_failed(&rw->lock_object,
  477             &contested, &waittime);
  478 
  479         for (;;) {
  480                 if (__rw_rlock_try(rw, td, &v, false LOCK_FILE_LINE_ARG))
  481                         break;
  482 #ifdef KDTRACE_HOOKS
  483                 lda.spin_cnt++;
  484 #endif
  485 
  486 #ifdef ADAPTIVE_RWLOCKS
  487                 /*
  488                  * If the owner is running on another CPU, spin until
  489                  * the owner stops running or the state of the lock
  490                  * changes.
  491                  */
  492                 if ((v & RW_LOCK_READ) == 0) {
  493                         owner = (struct thread *)RW_OWNER(v);
  494                         if (TD_IS_RUNNING(owner)) {
  495                                 if (LOCK_LOG_TEST(&rw->lock_object, 0))
  496                                         CTR3(KTR_LOCK,
  497                                             "%s: spinning on %p held by %p",
  498                                             __func__, rw, owner);
  499                                 KTR_STATE1(KTR_SCHED, "thread",
  500                                     sched_tdname(curthread), "spinning",
  501                                     "lockname:\"%s\"", rw->lock_object.lo_name);
  502                                 do {
  503                                         lock_delay(&lda);
  504                                         v = RW_READ_VALUE(rw);
  505                                         owner = lv_rw_wowner(v);
  506                                 } while (owner != NULL && TD_IS_RUNNING(owner));
  507                                 KTR_STATE0(KTR_SCHED, "thread",
  508                                     sched_tdname(curthread), "running");
  509                                 continue;
  510                         }
  511                 } else if (spintries < rowner_retries) {
  512                         spintries++;
  513                         KTR_STATE1(KTR_SCHED, "thread", sched_tdname(curthread),
  514                             "spinning", "lockname:\"%s\"",
  515                             rw->lock_object.lo_name);
  516                         for (i = 0; i < rowner_loops; i += n) {
  517                                 n = RW_READERS(v);
  518                                 lock_delay_spin(n);
  519                                 v = RW_READ_VALUE(rw);
  520                                 if ((v & RW_LOCK_READ) == 0 || __rw_can_read(td, v, false))
  521                                         break;
  522                         }
  523 #ifdef KDTRACE_HOOKS
  524                         lda.spin_cnt += rowner_loops - i;
  525 #endif
  526                         KTR_STATE0(KTR_SCHED, "thread", sched_tdname(curthread),
  527                             "running");
  528                         if (i < rowner_loops)
  529                                 continue;
  530                 }
  531 #endif
  532 
  533                 /*
  534                  * Okay, now it's the hard case.  Some other thread already
  535                  * has a write lock or there are write waiters present,
  536                  * acquire the turnstile lock so we can begin the process
  537                  * of blocking.
  538                  */
  539                 ts = turnstile_trywait(&rw->lock_object);
  540 
  541                 /*
  542                  * The lock might have been released while we spun, so
  543                  * recheck its state and restart the loop if needed.
  544                  */
  545                 v = RW_READ_VALUE(rw);
  546 retry_ts:
  547                 if (__rw_can_read(td, v, false)) {
  548                         turnstile_cancel(ts);
  549                         continue;
  550                 }
  551 
  552                 owner = lv_rw_wowner(v);
  553 
  554 #ifdef ADAPTIVE_RWLOCKS
  555                 /*
  556                  * The current lock owner might have started executing
  557                  * on another CPU (or the lock could have changed
  558                  * owners) while we were waiting on the turnstile
  559                  * chain lock.  If so, drop the turnstile lock and try
  560                  * again.
  561                  */
  562                 if (owner != NULL) {
  563                         if (TD_IS_RUNNING(owner)) {
  564                                 turnstile_cancel(ts);
  565                                 continue;
  566                         }
  567                 }
  568 #endif
  569 
  570                 /*
  571                  * The lock is held in write mode or it already has waiters.
  572                  */
  573                 MPASS(!__rw_can_read(td, v, false));
  574 
  575                 /*
  576                  * If the RW_LOCK_READ_WAITERS flag is already set, then
  577                  * we can go ahead and block.  If it is not set then try
  578                  * to set it.  If we fail to set it drop the turnstile
  579                  * lock and restart the loop.
  580                  */
  581                 if (!(v & RW_LOCK_READ_WAITERS)) {
  582                         if (!atomic_fcmpset_ptr(&rw->rw_lock, &v,
  583                             v | RW_LOCK_READ_WAITERS))
  584                                 goto retry_ts;
  585                         if (LOCK_LOG_TEST(&rw->lock_object, 0))
  586                                 CTR2(KTR_LOCK, "%s: %p set read waiters flag",
  587                                     __func__, rw);
  588                 }
  589 
  590                 /*
  591                  * We were unable to acquire the lock and the read waiters
  592                  * flag is set, so we must block on the turnstile.
  593                  */
  594                 if (LOCK_LOG_TEST(&rw->lock_object, 0))
  595                         CTR2(KTR_LOCK, "%s: %p blocking on turnstile", __func__,
  596                             rw);
  597 #ifdef KDTRACE_HOOKS
  598                 sleep_time -= lockstat_nsecs(&rw->lock_object);
  599 #endif
  600                 MPASS(owner == rw_owner(rw));
  601                 turnstile_wait(ts, owner, TS_SHARED_QUEUE);
  602 #ifdef KDTRACE_HOOKS
  603                 sleep_time += lockstat_nsecs(&rw->lock_object);
  604                 sleep_cnt++;
  605 #endif
  606                 if (LOCK_LOG_TEST(&rw->lock_object, 0))
  607                         CTR2(KTR_LOCK, "%s: %p resuming from turnstile",
  608                             __func__, rw);
  609                 v = RW_READ_VALUE(rw);
  610         }
  611 #if defined(KDTRACE_HOOKS) || defined(LOCK_PROFILING)
  612         if (__predict_true(!doing_lockprof))
  613                 return;
  614 #endif
  615 #ifdef KDTRACE_HOOKS
  616         all_time += lockstat_nsecs(&rw->lock_object);
  617         if (sleep_time)
  618                 LOCKSTAT_RECORD4(rw__block, rw, sleep_time,
  619                     LOCKSTAT_READER, (state & RW_LOCK_READ) == 0,
  620                     (state & RW_LOCK_READ) == 0 ? 0 : RW_READERS(state));
  621 
  622         /* Record only the loops spinning and not sleeping. */
  623         if (lda.spin_cnt > sleep_cnt)
  624                 LOCKSTAT_RECORD4(rw__spin, rw, all_time - sleep_time,
  625                     LOCKSTAT_READER, (state & RW_LOCK_READ) == 0,
  626                     (state & RW_LOCK_READ) == 0 ? 0 : RW_READERS(state));
  627 out_lockstat:
  628 #endif
  629         /*
  630          * TODO: acquire "owner of record" here.  Here be turnstile dragons
  631          * however.  turnstiles don't like owners changing between calls to
  632          * turnstile_wait() currently.
  633          */
  634         LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(rw__acquire, rw, contested,
  635             waittime, file, line, LOCKSTAT_READER);
  636 }
  637 
  638 void
  639 __rw_rlock_int(struct rwlock *rw LOCK_FILE_LINE_ARG_DEF)
  640 {
  641         struct thread *td;
  642         uintptr_t v;
  643 
  644         td = curthread;
  645 
  646         KASSERT(kdb_active != 0 || SCHEDULER_STOPPED_TD(td) ||
  647             !TD_IS_IDLETHREAD(td),
  648             ("rw_rlock() by idle thread %p on rwlock %s @ %s:%d",
  649             td, rw->lock_object.lo_name, file, line));
  650         KASSERT(rw->rw_lock != RW_DESTROYED,
  651             ("rw_rlock() of destroyed rwlock @ %s:%d", file, line));
  652         KASSERT(rw_wowner(rw) != td,
  653             ("rw_rlock: wlock already held for %s @ %s:%d",
  654             rw->lock_object.lo_name, file, line));
  655         WITNESS_CHECKORDER(&rw->lock_object, LOP_NEWORDER, file, line, NULL);
  656 
  657         v = RW_READ_VALUE(rw);
  658         if (__predict_false(LOCKSTAT_PROFILE_ENABLED(rw__acquire) ||
  659             !__rw_rlock_try(rw, td, &v, true LOCK_FILE_LINE_ARG)))
  660                 __rw_rlock_hard(rw, td, v LOCK_FILE_LINE_ARG);
  661         else
  662                 lock_profile_obtain_lock_success(&rw->lock_object, 0, 0,
  663                     file, line);
  664 
  665         LOCK_LOG_LOCK("RLOCK", &rw->lock_object, 0, 0, file, line);
  666         WITNESS_LOCK(&rw->lock_object, 0, file, line);
  667         TD_LOCKS_INC(curthread);
  668 }
  669 
  670 void
  671 __rw_rlock(volatile uintptr_t *c, const char *file, int line)
  672 {
  673         struct rwlock *rw;
  674 
  675         rw = rwlock2rw(c);
  676         __rw_rlock_int(rw LOCK_FILE_LINE_ARG);
  677 }
  678 
  679 int
  680 __rw_try_rlock_int(struct rwlock *rw LOCK_FILE_LINE_ARG_DEF)
  681 {
  682         uintptr_t x;
  683 
  684         if (SCHEDULER_STOPPED())
  685                 return (1);
  686 
  687         KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread),
  688             ("rw_try_rlock() by idle thread %p on rwlock %s @ %s:%d",
  689             curthread, rw->lock_object.lo_name, file, line));
  690 
  691         x = rw->rw_lock;
  692         for (;;) {
  693                 KASSERT(rw->rw_lock != RW_DESTROYED,
  694                     ("rw_try_rlock() of destroyed rwlock @ %s:%d", file, line));
  695                 if (!(x & RW_LOCK_READ))
  696                         break;
  697                 if (atomic_fcmpset_acq_ptr(&rw->rw_lock, &x, x + RW_ONE_READER)) {
  698                         LOCK_LOG_TRY("RLOCK", &rw->lock_object, 0, 1, file,
  699                             line);
  700                         WITNESS_LOCK(&rw->lock_object, LOP_TRYLOCK, file, line);
  701                         LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(rw__acquire,
  702                             rw, 0, 0, file, line, LOCKSTAT_READER);
  703                         TD_LOCKS_INC(curthread);
  704                         curthread->td_rw_rlocks++;
  705                         return (1);
  706                 }
  707         }
  708 
  709         LOCK_LOG_TRY("RLOCK", &rw->lock_object, 0, 0, file, line);
  710         return (0);
  711 }
  712 
  713 int
  714 __rw_try_rlock(volatile uintptr_t *c, const char *file, int line)
  715 {
  716         struct rwlock *rw;
  717 
  718         rw = rwlock2rw(c);
  719         return (__rw_try_rlock_int(rw LOCK_FILE_LINE_ARG));
  720 }
  721 
  722 static bool __always_inline
  723 __rw_runlock_try(struct rwlock *rw, struct thread *td, uintptr_t *vp)
  724 {
  725 
  726         for (;;) {
  727                 /*
  728                  * See if there is more than one read lock held.  If so,
  729                  * just drop one and return.
  730                  */
  731                 if (RW_READERS(*vp) > 1) {
  732                         if (atomic_fcmpset_rel_ptr(&rw->rw_lock, vp,
  733                             *vp - RW_ONE_READER)) {
  734                                 if (LOCK_LOG_TEST(&rw->lock_object, 0))
  735                                         CTR4(KTR_LOCK,
  736                                             "%s: %p succeeded %p -> %p",
  737                                             __func__, rw, (void *)*vp,
  738                                             (void *)(*vp - RW_ONE_READER));
  739                                 td->td_rw_rlocks--;
  740                                 return (true);
  741                         }
  742                         continue;
  743                 }
  744                 /*
  745                  * If there aren't any waiters for a write lock, then try
  746                  * to drop it quickly.
  747                  */
  748                 if (!(*vp & RW_LOCK_WAITERS)) {
  749                         MPASS((*vp & ~RW_LOCK_WRITE_SPINNER) ==
  750                             RW_READERS_LOCK(1));
  751                         if (atomic_fcmpset_rel_ptr(&rw->rw_lock, vp,
  752                             RW_UNLOCKED)) {
  753                                 if (LOCK_LOG_TEST(&rw->lock_object, 0))
  754                                         CTR2(KTR_LOCK, "%s: %p last succeeded",
  755                                             __func__, rw);
  756                                 td->td_rw_rlocks--;
  757                                 return (true);
  758                         }
  759                         continue;
  760                 }
  761                 break;
  762         }
  763         return (false);
  764 }
  765 
  766 static void __noinline
  767 __rw_runlock_hard(struct rwlock *rw, struct thread *td, uintptr_t v
  768     LOCK_FILE_LINE_ARG_DEF)
  769 {
  770         struct turnstile *ts;
  771         uintptr_t setv, queue;
  772 
  773         if (SCHEDULER_STOPPED())
  774                 return;
  775 
  776         if (__rw_runlock_try(rw, td, &v))
  777                 goto out_lockstat;
  778 
  779         /*
  780          * Ok, we know we have waiters and we think we are the
  781          * last reader, so grab the turnstile lock.
  782          */
  783         turnstile_chain_lock(&rw->lock_object);
  784         v = RW_READ_VALUE(rw);
  785         for (;;) {
  786                 if (__rw_runlock_try(rw, td, &v))
  787                         break;
  788 
  789                 v &= (RW_LOCK_WAITERS | RW_LOCK_WRITE_SPINNER);
  790                 MPASS(v & RW_LOCK_WAITERS);
  791 
  792                 /*
  793                  * Try to drop our lock leaving the lock in a unlocked
  794                  * state.
  795                  *
  796                  * If you wanted to do explicit lock handoff you'd have to
  797                  * do it here.  You'd also want to use turnstile_signal()
  798                  * and you'd have to handle the race where a higher
  799                  * priority thread blocks on the write lock before the
  800                  * thread you wakeup actually runs and have the new thread
  801                  * "steal" the lock.  For now it's a lot simpler to just
  802                  * wakeup all of the waiters.
  803                  *
  804                  * As above, if we fail, then another thread might have
  805                  * acquired a read lock, so drop the turnstile lock and
  806                  * restart.
  807                  */
  808                 setv = RW_UNLOCKED;
  809                 queue = TS_SHARED_QUEUE;
  810                 if (v & RW_LOCK_WRITE_WAITERS) {
  811                         queue = TS_EXCLUSIVE_QUEUE;
  812                         setv |= (v & RW_LOCK_READ_WAITERS);
  813                 }
  814                 v |= RW_READERS_LOCK(1);
  815                 if (!atomic_fcmpset_rel_ptr(&rw->rw_lock, &v, setv))
  816                         continue;
  817                 if (LOCK_LOG_TEST(&rw->lock_object, 0))
  818                         CTR2(KTR_LOCK, "%s: %p last succeeded with waiters",
  819                             __func__, rw);
  820 
  821                 /*
  822                  * Ok.  The lock is released and all that's left is to
  823                  * wake up the waiters.  Note that the lock might not be
  824                  * free anymore, but in that case the writers will just
  825                  * block again if they run before the new lock holder(s)
  826                  * release the lock.
  827                  */
  828                 ts = turnstile_lookup(&rw->lock_object);
  829                 MPASS(ts != NULL);
  830                 turnstile_broadcast(ts, queue);
  831                 turnstile_unpend(ts, TS_SHARED_LOCK);
  832                 td->td_rw_rlocks--;
  833                 break;
  834         }
  835         turnstile_chain_unlock(&rw->lock_object);
  836 out_lockstat:
  837         LOCKSTAT_PROFILE_RELEASE_RWLOCK(rw__release, rw, LOCKSTAT_READER);
  838 }
  839 
  840 void
  841 _rw_runlock_cookie_int(struct rwlock *rw LOCK_FILE_LINE_ARG_DEF)
  842 {
  843         struct thread *td;
  844         uintptr_t v;
  845 
  846         KASSERT(rw->rw_lock != RW_DESTROYED,
  847             ("rw_runlock() of destroyed rwlock @ %s:%d", file, line));
  848         __rw_assert(&rw->rw_lock, RA_RLOCKED, file, line);
  849         WITNESS_UNLOCK(&rw->lock_object, 0, file, line);
  850         LOCK_LOG_LOCK("RUNLOCK", &rw->lock_object, 0, 0, file, line);
  851 
  852         td = curthread;
  853         v = RW_READ_VALUE(rw);
  854 
  855         if (__predict_false(LOCKSTAT_PROFILE_ENABLED(rw__release) ||
  856             !__rw_runlock_try(rw, td, &v)))
  857                 __rw_runlock_hard(rw, td, v LOCK_FILE_LINE_ARG);
  858         else
  859                 lock_profile_release_lock(&rw->lock_object);
  860 
  861         TD_LOCKS_DEC(curthread);
  862 }
  863 
  864 void
  865 _rw_runlock_cookie(volatile uintptr_t *c, const char *file, int line)
  866 {
  867         struct rwlock *rw;
  868 
  869         rw = rwlock2rw(c);
  870         _rw_runlock_cookie_int(rw LOCK_FILE_LINE_ARG);
  871 }
  872 
  873 /*
  874  * This function is called when we are unable to obtain a write lock on the
  875  * first try.  This means that at least one other thread holds either a
  876  * read or write lock.
  877  */
  878 void
  879 __rw_wlock_hard(volatile uintptr_t *c, uintptr_t v LOCK_FILE_LINE_ARG_DEF)
  880 {
  881         uintptr_t tid;
  882         struct rwlock *rw;
  883         struct turnstile *ts;
  884         struct thread *owner;
  885 #ifdef ADAPTIVE_RWLOCKS
  886         int spintries = 0;
  887         int i, n;
  888         enum { READERS, WRITER } sleep_reason = READERS;
  889 #endif
  890         uintptr_t x;
  891 #ifdef LOCK_PROFILING
  892         uint64_t waittime = 0;
  893         int contested = 0;
  894 #endif
  895 #if defined(ADAPTIVE_RWLOCKS) || defined(KDTRACE_HOOKS)
  896         struct lock_delay_arg lda;
  897 #endif
  898 #ifdef KDTRACE_HOOKS
  899         u_int sleep_cnt = 0;
  900         int64_t sleep_time = 0;
  901         int64_t all_time = 0;
  902 #endif
  903 #if defined(KDTRACE_HOOKS) || defined(LOCK_PROFILING)
  904         uintptr_t state;
  905         int doing_lockprof = 0;
  906 #endif
  907 
  908         tid = (uintptr_t)curthread;
  909         rw = rwlock2rw(c);
  910 
  911 #ifdef KDTRACE_HOOKS
  912         if (LOCKSTAT_PROFILE_ENABLED(rw__acquire)) {
  913                 while (v == RW_UNLOCKED) {
  914                         if (_rw_write_lock_fetch(rw, &v, tid))
  915                                 goto out_lockstat;
  916                 }
  917                 doing_lockprof = 1;
  918                 all_time -= lockstat_nsecs(&rw->lock_object);
  919                 state = v;
  920         }
  921 #endif
  922 #ifdef LOCK_PROFILING
  923         doing_lockprof = 1;
  924         state = v;
  925 #endif
  926 
  927         if (SCHEDULER_STOPPED())
  928                 return;
  929 
  930 #if defined(ADAPTIVE_RWLOCKS)
  931         lock_delay_arg_init(&lda, &rw_delay);
  932 #elif defined(KDTRACE_HOOKS)
  933         lock_delay_arg_init(&lda, NULL);
  934 #endif
  935         if (__predict_false(v == RW_UNLOCKED))
  936                 v = RW_READ_VALUE(rw);
  937 
  938         if (__predict_false(lv_rw_wowner(v) == (struct thread *)tid)) {
  939                 KASSERT(rw->lock_object.lo_flags & LO_RECURSABLE,
  940                     ("%s: recursing but non-recursive rw %s @ %s:%d\n",
  941                     __func__, rw->lock_object.lo_name, file, line));
  942                 rw->rw_recurse++;
  943                 atomic_set_ptr(&rw->rw_lock, RW_LOCK_WRITER_RECURSED);
  944                 if (LOCK_LOG_TEST(&rw->lock_object, 0))
  945                         CTR2(KTR_LOCK, "%s: %p recursing", __func__, rw);
  946                 return;
  947         }
  948 
  949         if (LOCK_LOG_TEST(&rw->lock_object, 0))
  950                 CTR5(KTR_LOCK, "%s: %s contested (lock=%p) at %s:%d", __func__,
  951                     rw->lock_object.lo_name, (void *)rw->rw_lock, file, line);
  952 
  953 #ifdef HWPMC_HOOKS
  954         PMC_SOFT_CALL( , , lock, failed);
  955 #endif
  956         lock_profile_obtain_lock_failed(&rw->lock_object,
  957             &contested, &waittime);
  958 
  959         for (;;) {
  960                 if (v == RW_UNLOCKED) {
  961                         if (_rw_write_lock_fetch(rw, &v, tid))
  962                                 break;
  963                         continue;
  964                 }
  965 #ifdef KDTRACE_HOOKS
  966                 lda.spin_cnt++;
  967 #endif
  968 
  969 #ifdef ADAPTIVE_RWLOCKS
  970                 /*
  971                  * If the lock is write locked and the owner is
  972                  * running on another CPU, spin until the owner stops
  973                  * running or the state of the lock changes.
  974                  */
  975                 if (!(v & RW_LOCK_READ)) {
  976                         sleep_reason = WRITER;
  977                         owner = lv_rw_wowner(v);
  978                         if (!TD_IS_RUNNING(owner))
  979                                 goto ts;
  980                         if (LOCK_LOG_TEST(&rw->lock_object, 0))
  981                                 CTR3(KTR_LOCK, "%s: spinning on %p held by %p",
  982                                     __func__, rw, owner);
  983                         KTR_STATE1(KTR_SCHED, "thread", sched_tdname(curthread),
  984                             "spinning", "lockname:\"%s\"",
  985                             rw->lock_object.lo_name);
  986                         do {
  987                                 lock_delay(&lda);
  988                                 v = RW_READ_VALUE(rw);
  989                                 owner = lv_rw_wowner(v);
  990                         } while (owner != NULL && TD_IS_RUNNING(owner));
  991                         KTR_STATE0(KTR_SCHED, "thread", sched_tdname(curthread),
  992                             "running");
  993                         continue;
  994                 } else if (RW_READERS(v) > 0) {
  995                         sleep_reason = READERS;
  996                         if (spintries == rowner_retries)
  997                                 goto ts;
  998                         if (!(v & RW_LOCK_WRITE_SPINNER)) {
  999                                 if (!atomic_fcmpset_ptr(&rw->rw_lock, &v,
 1000                                     v | RW_LOCK_WRITE_SPINNER)) {
 1001                                         continue;
 1002                                 }
 1003                         }
 1004                         spintries++;
 1005                         KTR_STATE1(KTR_SCHED, "thread", sched_tdname(curthread),
 1006                             "spinning", "lockname:\"%s\"",
 1007                             rw->lock_object.lo_name);
 1008                         for (i = 0; i < rowner_loops; i += n) {
 1009                                 n = RW_READERS(v);
 1010                                 lock_delay_spin(n);
 1011                                 v = RW_READ_VALUE(rw);
 1012                                 if ((v & RW_LOCK_WRITE_SPINNER) == 0)
 1013                                         break;
 1014                         }
 1015 #ifdef KDTRACE_HOOKS
 1016                         lda.spin_cnt += i;
 1017 #endif
 1018                         KTR_STATE0(KTR_SCHED, "thread", sched_tdname(curthread),
 1019                             "running");
 1020                         if (i < rowner_loops)
 1021                                 continue;
 1022                 }
 1023 ts:
 1024 #endif
 1025                 ts = turnstile_trywait(&rw->lock_object);
 1026                 v = RW_READ_VALUE(rw);
 1027 retry_ts:
 1028                 owner = lv_rw_wowner(v);
 1029 
 1030 #ifdef ADAPTIVE_RWLOCKS
 1031                 /*
 1032                  * The current lock owner might have started executing
 1033                  * on another CPU (or the lock could have changed
 1034                  * owners) while we were waiting on the turnstile
 1035                  * chain lock.  If so, drop the turnstile lock and try
 1036                  * again.
 1037                  */
 1038                 if (owner != NULL) {
 1039                         if (TD_IS_RUNNING(owner)) {
 1040                                 turnstile_cancel(ts);
 1041                                 continue;
 1042                         }
 1043                 } else if (RW_READERS(v) > 0 && sleep_reason == WRITER) {
 1044                         turnstile_cancel(ts);
 1045                         continue;
 1046                 }
 1047 #endif
 1048                 /*
 1049                  * Check for the waiters flags about this rwlock.
 1050                  * If the lock was released, without maintain any pending
 1051                  * waiters queue, simply try to acquire it.
 1052                  * If a pending waiters queue is present, claim the lock
 1053                  * ownership and maintain the pending queue.
 1054                  */
 1055                 x = v & (RW_LOCK_WAITERS | RW_LOCK_WRITE_SPINNER);
 1056                 if ((v & ~x) == RW_UNLOCKED) {
 1057                         x &= ~RW_LOCK_WRITE_SPINNER;
 1058                         if (atomic_fcmpset_acq_ptr(&rw->rw_lock, &v, tid | x)) {
 1059                                 if (x)
 1060                                         turnstile_claim(ts);
 1061                                 else
 1062                                         turnstile_cancel(ts);
 1063                                 break;
 1064                         }
 1065                         goto retry_ts;
 1066                 }
 1067                 /*
 1068                  * If the RW_LOCK_WRITE_WAITERS flag isn't set, then try to
 1069                  * set it.  If we fail to set it, then loop back and try
 1070                  * again.
 1071                  */
 1072                 if (!(v & RW_LOCK_WRITE_WAITERS)) {
 1073                         if (!atomic_fcmpset_ptr(&rw->rw_lock, &v,
 1074                             v | RW_LOCK_WRITE_WAITERS))
 1075                                 goto retry_ts;
 1076                         if (LOCK_LOG_TEST(&rw->lock_object, 0))
 1077                                 CTR2(KTR_LOCK, "%s: %p set write waiters flag",
 1078                                     __func__, rw);
 1079                 }
 1080                 /*
 1081                  * We were unable to acquire the lock and the write waiters
 1082                  * flag is set, so we must block on the turnstile.
 1083                  */
 1084                 if (LOCK_LOG_TEST(&rw->lock_object, 0))
 1085                         CTR2(KTR_LOCK, "%s: %p blocking on turnstile", __func__,
 1086                             rw);
 1087 #ifdef KDTRACE_HOOKS
 1088                 sleep_time -= lockstat_nsecs(&rw->lock_object);
 1089 #endif
 1090                 MPASS(owner == rw_owner(rw));
 1091                 turnstile_wait(ts, owner, TS_EXCLUSIVE_QUEUE);
 1092 #ifdef KDTRACE_HOOKS
 1093                 sleep_time += lockstat_nsecs(&rw->lock_object);
 1094                 sleep_cnt++;
 1095 #endif
 1096                 if (LOCK_LOG_TEST(&rw->lock_object, 0))
 1097                         CTR2(KTR_LOCK, "%s: %p resuming from turnstile",
 1098                             __func__, rw);
 1099 #ifdef ADAPTIVE_RWLOCKS
 1100                 spintries = 0;
 1101 #endif
 1102                 v = RW_READ_VALUE(rw);
 1103         }
 1104 #if defined(KDTRACE_HOOKS) || defined(LOCK_PROFILING)
 1105         if (__predict_true(!doing_lockprof))
 1106                 return;
 1107 #endif
 1108 #ifdef KDTRACE_HOOKS
 1109         all_time += lockstat_nsecs(&rw->lock_object);
 1110         if (sleep_time)
 1111                 LOCKSTAT_RECORD4(rw__block, rw, sleep_time,
 1112                     LOCKSTAT_WRITER, (state & RW_LOCK_READ) == 0,
 1113                     (state & RW_LOCK_READ) == 0 ? 0 : RW_READERS(state));
 1114 
 1115         /* Record only the loops spinning and not sleeping. */
 1116         if (lda.spin_cnt > sleep_cnt)
 1117                 LOCKSTAT_RECORD4(rw__spin, rw, all_time - sleep_time,
 1118                     LOCKSTAT_WRITER, (state & RW_LOCK_READ) == 0,
 1119                     (state & RW_LOCK_READ) == 0 ? 0 : RW_READERS(state));
 1120 out_lockstat:
 1121 #endif
 1122         LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(rw__acquire, rw, contested,
 1123             waittime, file, line, LOCKSTAT_WRITER);
 1124 }
 1125 
 1126 /*
 1127  * This function is called if lockstat is active or the first try at releasing
 1128  * a write lock failed.  The latter means that the lock is recursed or one of
 1129  * the 2 waiter bits must be set indicating that at least one thread is waiting
 1130  * on this lock.
 1131  */
 1132 void
 1133 __rw_wunlock_hard(volatile uintptr_t *c, uintptr_t v LOCK_FILE_LINE_ARG_DEF)
 1134 {
 1135         struct rwlock *rw;
 1136         struct turnstile *ts;
 1137         uintptr_t tid, setv;
 1138         int queue;
 1139 
 1140         tid = (uintptr_t)curthread;
 1141         if (SCHEDULER_STOPPED())
 1142                 return;
 1143 
 1144         rw = rwlock2rw(c);
 1145         if (__predict_false(v == tid))
 1146                 v = RW_READ_VALUE(rw);
 1147 
 1148         if (v & RW_LOCK_WRITER_RECURSED) {
 1149                 if (--(rw->rw_recurse) == 0)
 1150                         atomic_clear_ptr(&rw->rw_lock, RW_LOCK_WRITER_RECURSED);
 1151                 if (LOCK_LOG_TEST(&rw->lock_object, 0))
 1152                         CTR2(KTR_LOCK, "%s: %p unrecursing", __func__, rw);
 1153                 return;
 1154         }
 1155 
 1156         LOCKSTAT_PROFILE_RELEASE_RWLOCK(rw__release, rw, LOCKSTAT_WRITER);
 1157         if (v == tid && _rw_write_unlock(rw, tid))
 1158                 return;
 1159 
 1160         KASSERT(rw->rw_lock & (RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS),
 1161             ("%s: neither of the waiter flags are set", __func__));
 1162 
 1163         if (LOCK_LOG_TEST(&rw->lock_object, 0))
 1164                 CTR2(KTR_LOCK, "%s: %p contested", __func__, rw);
 1165 
 1166         turnstile_chain_lock(&rw->lock_object);
 1167 
 1168         /*
 1169          * Use the same algo as sx locks for now.  Prefer waking up shared
 1170          * waiters if we have any over writers.  This is probably not ideal.
 1171          *
 1172          * 'v' is the value we are going to write back to rw_lock.  If we
 1173          * have waiters on both queues, we need to preserve the state of
 1174          * the waiter flag for the queue we don't wake up.  For now this is
 1175          * hardcoded for the algorithm mentioned above.
 1176          *
 1177          * In the case of both readers and writers waiting we wakeup the
 1178          * readers but leave the RW_LOCK_WRITE_WAITERS flag set.  If a
 1179          * new writer comes in before a reader it will claim the lock up
 1180          * above.  There is probably a potential priority inversion in
 1181          * there that could be worked around either by waking both queues
 1182          * of waiters or doing some complicated lock handoff gymnastics.
 1183          */
 1184         setv = RW_UNLOCKED;
 1185         v = RW_READ_VALUE(rw);
 1186         queue = TS_SHARED_QUEUE;
 1187         if (v & RW_LOCK_WRITE_WAITERS) {
 1188                 queue = TS_EXCLUSIVE_QUEUE;
 1189                 setv |= (v & RW_LOCK_READ_WAITERS);
 1190         }
 1191         atomic_store_rel_ptr(&rw->rw_lock, setv);
 1192 
 1193         /* Wake up all waiters for the specific queue. */
 1194         if (LOCK_LOG_TEST(&rw->lock_object, 0))
 1195                 CTR3(KTR_LOCK, "%s: %p waking up %s waiters", __func__, rw,
 1196                     queue == TS_SHARED_QUEUE ? "read" : "write");
 1197 
 1198         ts = turnstile_lookup(&rw->lock_object);
 1199         MPASS(ts != NULL);
 1200         turnstile_broadcast(ts, queue);
 1201         turnstile_unpend(ts, TS_EXCLUSIVE_LOCK);
 1202         turnstile_chain_unlock(&rw->lock_object);
 1203 }
 1204 
 1205 /*
 1206  * Attempt to do a non-blocking upgrade from a read lock to a write
 1207  * lock.  This will only succeed if this thread holds a single read
 1208  * lock.  Returns true if the upgrade succeeded and false otherwise.
 1209  */
 1210 int
 1211 __rw_try_upgrade_int(struct rwlock *rw LOCK_FILE_LINE_ARG_DEF)
 1212 {
 1213         uintptr_t v, setv, tid;
 1214         struct turnstile *ts;
 1215         int success;
 1216 
 1217         if (SCHEDULER_STOPPED())
 1218                 return (1);
 1219 
 1220         KASSERT(rw->rw_lock != RW_DESTROYED,
 1221             ("rw_try_upgrade() of destroyed rwlock @ %s:%d", file, line));
 1222         __rw_assert(&rw->rw_lock, RA_RLOCKED, file, line);
 1223 
 1224         /*
 1225          * Attempt to switch from one reader to a writer.  If there
 1226          * are any write waiters, then we will have to lock the
 1227          * turnstile first to prevent races with another writer
 1228          * calling turnstile_wait() before we have claimed this
 1229          * turnstile.  So, do the simple case of no waiters first.
 1230          */
 1231         tid = (uintptr_t)curthread;
 1232         success = 0;
 1233         v = RW_READ_VALUE(rw);
 1234         for (;;) {
 1235                 if (RW_READERS(v) > 1)
 1236                         break;
 1237                 if (!(v & RW_LOCK_WAITERS)) {
 1238                         success = atomic_fcmpset_acq_ptr(&rw->rw_lock, &v, tid);
 1239                         if (!success)
 1240                                 continue;
 1241                         break;
 1242                 }
 1243 
 1244                 /*
 1245                  * Ok, we think we have waiters, so lock the turnstile.
 1246                  */
 1247                 ts = turnstile_trywait(&rw->lock_object);
 1248                 v = RW_READ_VALUE(rw);
 1249 retry_ts:
 1250                 if (RW_READERS(v) > 1) {
 1251                         turnstile_cancel(ts);
 1252                         break;
 1253                 }
 1254                 /*
 1255                  * Try to switch from one reader to a writer again.  This time
 1256                  * we honor the current state of the waiters flags.
 1257                  * If we obtain the lock with the flags set, then claim
 1258                  * ownership of the turnstile.
 1259                  */
 1260                 setv = tid | (v & RW_LOCK_WAITERS);
 1261                 success = atomic_fcmpset_ptr(&rw->rw_lock, &v, setv);
 1262                 if (success) {
 1263                         if (v & RW_LOCK_WAITERS)
 1264                                 turnstile_claim(ts);
 1265                         else
 1266                                 turnstile_cancel(ts);
 1267                         break;
 1268                 }
 1269                 goto retry_ts;
 1270         }
 1271         LOCK_LOG_TRY("WUPGRADE", &rw->lock_object, 0, success, file, line);
 1272         if (success) {
 1273                 curthread->td_rw_rlocks--;
 1274                 WITNESS_UPGRADE(&rw->lock_object, LOP_EXCLUSIVE | LOP_TRYLOCK,
 1275                     file, line);
 1276                 LOCKSTAT_RECORD0(rw__upgrade, rw);
 1277         }
 1278         return (success);
 1279 }
 1280 
 1281 int
 1282 __rw_try_upgrade(volatile uintptr_t *c, const char *file, int line)
 1283 {
 1284         struct rwlock *rw;
 1285 
 1286         rw = rwlock2rw(c);
 1287         return (__rw_try_upgrade_int(rw LOCK_FILE_LINE_ARG));
 1288 }
 1289 
 1290 /*
 1291  * Downgrade a write lock into a single read lock.
 1292  */
 1293 void
 1294 __rw_downgrade_int(struct rwlock *rw LOCK_FILE_LINE_ARG_DEF)
 1295 {
 1296         struct turnstile *ts;
 1297         uintptr_t tid, v;
 1298         int rwait, wwait;
 1299 
 1300         if (SCHEDULER_STOPPED())
 1301                 return;
 1302 
 1303         KASSERT(rw->rw_lock != RW_DESTROYED,
 1304             ("rw_downgrade() of destroyed rwlock @ %s:%d", file, line));
 1305         __rw_assert(&rw->rw_lock, RA_WLOCKED | RA_NOTRECURSED, file, line);
 1306 #ifndef INVARIANTS
 1307         if (rw_recursed(rw))
 1308                 panic("downgrade of a recursed lock");
 1309 #endif
 1310 
 1311         WITNESS_DOWNGRADE(&rw->lock_object, 0, file, line);
 1312 
 1313         /*
 1314          * Convert from a writer to a single reader.  First we handle
 1315          * the easy case with no waiters.  If there are any waiters, we
 1316          * lock the turnstile and "disown" the lock.
 1317          */
 1318         tid = (uintptr_t)curthread;
 1319         if (atomic_cmpset_rel_ptr(&rw->rw_lock, tid, RW_READERS_LOCK(1)))
 1320                 goto out;
 1321 
 1322         /*
 1323          * Ok, we think we have waiters, so lock the turnstile so we can
 1324          * read the waiter flags without any races.
 1325          */
 1326         turnstile_chain_lock(&rw->lock_object);
 1327         v = rw->rw_lock & RW_LOCK_WAITERS;
 1328         rwait = v & RW_LOCK_READ_WAITERS;
 1329         wwait = v & RW_LOCK_WRITE_WAITERS;
 1330         MPASS(rwait | wwait);
 1331 
 1332         /*
 1333          * Downgrade from a write lock while preserving waiters flag
 1334          * and give up ownership of the turnstile.
 1335          */
 1336         ts = turnstile_lookup(&rw->lock_object);
 1337         MPASS(ts != NULL);
 1338         if (!wwait)
 1339                 v &= ~RW_LOCK_READ_WAITERS;
 1340         atomic_store_rel_ptr(&rw->rw_lock, RW_READERS_LOCK(1) | v);
 1341         /*
 1342          * Wake other readers if there are no writers pending.  Otherwise they
 1343          * won't be able to acquire the lock anyway.
 1344          */
 1345         if (rwait && !wwait) {
 1346                 turnstile_broadcast(ts, TS_SHARED_QUEUE);
 1347                 turnstile_unpend(ts, TS_EXCLUSIVE_LOCK);
 1348         } else
 1349                 turnstile_disown(ts);
 1350         turnstile_chain_unlock(&rw->lock_object);
 1351 out:
 1352         curthread->td_rw_rlocks++;
 1353         LOCK_LOG_LOCK("WDOWNGRADE", &rw->lock_object, 0, 0, file, line);
 1354         LOCKSTAT_RECORD0(rw__downgrade, rw);
 1355 }
 1356 
 1357 void
 1358 __rw_downgrade(volatile uintptr_t *c, const char *file, int line)
 1359 {
 1360         struct rwlock *rw;
 1361 
 1362         rw = rwlock2rw(c);
 1363         __rw_downgrade_int(rw LOCK_FILE_LINE_ARG);
 1364 }
 1365 
 1366 #ifdef INVARIANT_SUPPORT
 1367 #ifndef INVARIANTS
 1368 #undef __rw_assert
 1369 #endif
 1370 
 1371 /*
 1372  * In the non-WITNESS case, rw_assert() can only detect that at least
 1373  * *some* thread owns an rlock, but it cannot guarantee that *this*
 1374  * thread owns an rlock.
 1375  */
 1376 void
 1377 __rw_assert(const volatile uintptr_t *c, int what, const char *file, int line)
 1378 {
 1379         const struct rwlock *rw;
 1380 
 1381         if (panicstr != NULL)
 1382                 return;
 1383 
 1384         rw = rwlock2rw(c);
 1385 
 1386         switch (what) {
 1387         case RA_LOCKED:
 1388         case RA_LOCKED | RA_RECURSED:
 1389         case RA_LOCKED | RA_NOTRECURSED:
 1390         case RA_RLOCKED:
 1391         case RA_RLOCKED | RA_RECURSED:
 1392         case RA_RLOCKED | RA_NOTRECURSED:
 1393 #ifdef WITNESS
 1394                 witness_assert(&rw->lock_object, what, file, line);
 1395 #else
 1396                 /*
 1397                  * If some other thread has a write lock or we have one
 1398                  * and are asserting a read lock, fail.  Also, if no one
 1399                  * has a lock at all, fail.
 1400                  */
 1401                 if (rw->rw_lock == RW_UNLOCKED ||
 1402                     (!(rw->rw_lock & RW_LOCK_READ) && (what & RA_RLOCKED ||
 1403                     rw_wowner(rw) != curthread)))
 1404                         panic("Lock %s not %slocked @ %s:%d\n",
 1405                             rw->lock_object.lo_name, (what & RA_RLOCKED) ?
 1406                             "read " : "", file, line);
 1407 
 1408                 if (!(rw->rw_lock & RW_LOCK_READ) && !(what & RA_RLOCKED)) {
 1409                         if (rw_recursed(rw)) {
 1410                                 if (what & RA_NOTRECURSED)
 1411                                         panic("Lock %s recursed @ %s:%d\n",
 1412                                             rw->lock_object.lo_name, file,
 1413                                             line);
 1414                         } else if (what & RA_RECURSED)
 1415                                 panic("Lock %s not recursed @ %s:%d\n",
 1416                                     rw->lock_object.lo_name, file, line);
 1417                 }
 1418 #endif
 1419                 break;
 1420         case RA_WLOCKED:
 1421         case RA_WLOCKED | RA_RECURSED:
 1422         case RA_WLOCKED | RA_NOTRECURSED:
 1423                 if (rw_wowner(rw) != curthread)
 1424                         panic("Lock %s not exclusively locked @ %s:%d\n",
 1425                             rw->lock_object.lo_name, file, line);
 1426                 if (rw_recursed(rw)) {
 1427                         if (what & RA_NOTRECURSED)
 1428                                 panic("Lock %s recursed @ %s:%d\n",
 1429                                     rw->lock_object.lo_name, file, line);
 1430                 } else if (what & RA_RECURSED)
 1431                         panic("Lock %s not recursed @ %s:%d\n",
 1432                             rw->lock_object.lo_name, file, line);
 1433                 break;
 1434         case RA_UNLOCKED:
 1435 #ifdef WITNESS
 1436                 witness_assert(&rw->lock_object, what, file, line);
 1437 #else
 1438                 /*
 1439                  * If we hold a write lock fail.  We can't reliably check
 1440                  * to see if we hold a read lock or not.
 1441                  */
 1442                 if (rw_wowner(rw) == curthread)
 1443                         panic("Lock %s exclusively locked @ %s:%d\n",
 1444                             rw->lock_object.lo_name, file, line);
 1445 #endif
 1446                 break;
 1447         default:
 1448                 panic("Unknown rw lock assertion: %d @ %s:%d", what, file,
 1449                     line);
 1450         }
 1451 }
 1452 #endif /* INVARIANT_SUPPORT */
 1453 
 1454 #ifdef DDB
 1455 void
 1456 db_show_rwlock(const struct lock_object *lock)
 1457 {
 1458         const struct rwlock *rw;
 1459         struct thread *td;
 1460 
 1461         rw = (const struct rwlock *)lock;
 1462 
 1463         db_printf(" state: ");
 1464         if (rw->rw_lock == RW_UNLOCKED)
 1465                 db_printf("UNLOCKED\n");
 1466         else if (rw->rw_lock == RW_DESTROYED) {
 1467                 db_printf("DESTROYED\n");
 1468                 return;
 1469         } else if (rw->rw_lock & RW_LOCK_READ)
 1470                 db_printf("RLOCK: %ju locks\n",
 1471                     (uintmax_t)(RW_READERS(rw->rw_lock)));
 1472         else {
 1473                 td = rw_wowner(rw);
 1474                 db_printf("WLOCK: %p (tid %d, pid %d, \"%s\")\n", td,
 1475                     td->td_tid, td->td_proc->p_pid, td->td_name);
 1476                 if (rw_recursed(rw))
 1477                         db_printf(" recursed: %u\n", rw->rw_recurse);
 1478         }
 1479         db_printf(" waiters: ");
 1480         switch (rw->rw_lock & (RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS)) {
 1481         case RW_LOCK_READ_WAITERS:
 1482                 db_printf("readers\n");
 1483                 break;
 1484         case RW_LOCK_WRITE_WAITERS:
 1485                 db_printf("writers\n");
 1486                 break;
 1487         case RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS:
 1488                 db_printf("readers and writers\n");
 1489                 break;
 1490         default:
 1491                 db_printf("none\n");
 1492                 break;
 1493         }
 1494 }
 1495 
 1496 #endif

Cache object: 4ee859eba64a628a59b4dbdb39a53ade


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.