The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/kern_rwlock.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 2006 John Baldwin <jhb@FreeBSD.org>
    3  *
    4  * Redistribution and use in source and binary forms, with or without
    5  * modification, are permitted provided that the following conditions
    6  * are met:
    7  * 1. Redistributions of source code must retain the above copyright
    8  *    notice, this list of conditions and the following disclaimer.
    9  * 2. Redistributions in binary form must reproduce the above copyright
   10  *    notice, this list of conditions and the following disclaimer in the
   11  *    documentation and/or other materials provided with the distribution.
   12  *
   13  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   14  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   16  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   17  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   18  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   19  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   20  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   21  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   22  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   23  * SUCH DAMAGE.
   24  */
   25 
   26 /*
   27  * Machine independent bits of reader/writer lock implementation.
   28  */
   29 
   30 #include <sys/cdefs.h>
   31 __FBSDID("$FreeBSD$");
   32 
   33 #include "opt_ddb.h"
   34 #include "opt_hwpmc_hooks.h"
   35 #include "opt_no_adaptive_rwlocks.h"
   36 
   37 #include <sys/param.h>
   38 #include <sys/kdb.h>
   39 #include <sys/ktr.h>
   40 #include <sys/kernel.h>
   41 #include <sys/lock.h>
   42 #include <sys/mutex.h>
   43 #include <sys/proc.h>
   44 #include <sys/rwlock.h>
   45 #include <sys/sched.h>
   46 #include <sys/smp.h>
   47 #include <sys/sysctl.h>
   48 #include <sys/systm.h>
   49 #include <sys/turnstile.h>
   50 
   51 #include <machine/cpu.h>
   52 
   53 #if defined(SMP) && !defined(NO_ADAPTIVE_RWLOCKS)
   54 #define ADAPTIVE_RWLOCKS
   55 #endif
   56 
   57 #ifdef HWPMC_HOOKS
   58 #include <sys/pmckern.h>
   59 PMC_SOFT_DECLARE( , , lock, failed);
   60 #endif
   61 
   62 /*
   63  * Return the rwlock address when the lock cookie address is provided.
   64  * This functionality assumes that struct rwlock* have a member named rw_lock.
   65  */
   66 #define rwlock2rw(c)    (__containerof(c, struct rwlock, rw_lock))
   67 
   68 #ifdef DDB
   69 #include <ddb/ddb.h>
   70 
   71 static void     db_show_rwlock(const struct lock_object *lock);
   72 #endif
   73 static void     assert_rw(const struct lock_object *lock, int what);
   74 static void     lock_rw(struct lock_object *lock, uintptr_t how);
   75 #ifdef KDTRACE_HOOKS
   76 static int      owner_rw(const struct lock_object *lock, struct thread **owner);
   77 #endif
   78 static uintptr_t unlock_rw(struct lock_object *lock);
   79 
   80 struct lock_class lock_class_rw = {
   81         .lc_name = "rw",
   82         .lc_flags = LC_SLEEPLOCK | LC_RECURSABLE | LC_UPGRADABLE,
   83         .lc_assert = assert_rw,
   84 #ifdef DDB
   85         .lc_ddb_show = db_show_rwlock,
   86 #endif
   87         .lc_lock = lock_rw,
   88         .lc_unlock = unlock_rw,
   89 #ifdef KDTRACE_HOOKS
   90         .lc_owner = owner_rw,
   91 #endif
   92 };
   93 
   94 #ifdef ADAPTIVE_RWLOCKS
   95 static int __read_frequently rowner_retries;
   96 static int __read_frequently rowner_loops;
   97 static SYSCTL_NODE(_debug, OID_AUTO, rwlock, CTLFLAG_RD, NULL,
   98     "rwlock debugging");
   99 SYSCTL_INT(_debug_rwlock, OID_AUTO, retry, CTLFLAG_RW, &rowner_retries, 0, "");
  100 SYSCTL_INT(_debug_rwlock, OID_AUTO, loops, CTLFLAG_RW, &rowner_loops, 0, "");
  101 
  102 static struct lock_delay_config __read_frequently rw_delay;
  103 
  104 SYSCTL_INT(_debug_rwlock, OID_AUTO, delay_base, CTLFLAG_RW, &rw_delay.base,
  105     0, "");
  106 SYSCTL_INT(_debug_rwlock, OID_AUTO, delay_max, CTLFLAG_RW, &rw_delay.max,
  107     0, "");
  108 
  109 static void
  110 rw_lock_delay_init(void *arg __unused)
  111 {
  112 
  113         lock_delay_default_init(&rw_delay);
  114         rowner_retries = 10;
  115         rowner_loops = max(10000, rw_delay.max);
  116 }
  117 LOCK_DELAY_SYSINIT(rw_lock_delay_init);
  118 #endif
  119 
  120 /*
  121  * Return a pointer to the owning thread if the lock is write-locked or
  122  * NULL if the lock is unlocked or read-locked.
  123  */
  124 
  125 #define lv_rw_wowner(v)                                                 \
  126         ((v) & RW_LOCK_READ ? NULL :                                    \
  127          (struct thread *)RW_OWNER((v)))
  128 
  129 #define rw_wowner(rw)   lv_rw_wowner(RW_READ_VALUE(rw))
  130 
  131 /*
  132  * Returns if a write owner is recursed.  Write ownership is not assured
  133  * here and should be previously checked.
  134  */
  135 #define rw_recursed(rw)         ((rw)->rw_recurse != 0)
  136 
  137 /*
  138  * Return true if curthread helds the lock.
  139  */
  140 #define rw_wlocked(rw)          (rw_wowner((rw)) == curthread)
  141 
  142 /*
  143  * Return a pointer to the owning thread for this lock who should receive
  144  * any priority lent by threads that block on this lock.  Currently this
  145  * is identical to rw_wowner().
  146  */
  147 #define rw_owner(rw)            rw_wowner(rw)
  148 
  149 #ifndef INVARIANTS
  150 #define __rw_assert(c, what, file, line)
  151 #endif
  152 
  153 void
  154 assert_rw(const struct lock_object *lock, int what)
  155 {
  156 
  157         rw_assert((const struct rwlock *)lock, what);
  158 }
  159 
  160 void
  161 lock_rw(struct lock_object *lock, uintptr_t how)
  162 {
  163         struct rwlock *rw;
  164 
  165         rw = (struct rwlock *)lock;
  166         if (how)
  167                 rw_rlock(rw);
  168         else
  169                 rw_wlock(rw);
  170 }
  171 
  172 uintptr_t
  173 unlock_rw(struct lock_object *lock)
  174 {
  175         struct rwlock *rw;
  176 
  177         rw = (struct rwlock *)lock;
  178         rw_assert(rw, RA_LOCKED | LA_NOTRECURSED);
  179         if (rw->rw_lock & RW_LOCK_READ) {
  180                 rw_runlock(rw);
  181                 return (1);
  182         } else {
  183                 rw_wunlock(rw);
  184                 return (0);
  185         }
  186 }
  187 
  188 #ifdef KDTRACE_HOOKS
  189 int
  190 owner_rw(const struct lock_object *lock, struct thread **owner)
  191 {
  192         const struct rwlock *rw = (const struct rwlock *)lock;
  193         uintptr_t x = rw->rw_lock;
  194 
  195         *owner = rw_wowner(rw);
  196         return ((x & RW_LOCK_READ) != 0 ?  (RW_READERS(x) != 0) :
  197             (*owner != NULL));
  198 }
  199 #endif
  200 
  201 void
  202 _rw_init_flags(volatile uintptr_t *c, const char *name, int opts)
  203 {
  204         struct rwlock *rw;
  205         int flags;
  206 
  207         rw = rwlock2rw(c);
  208 
  209         MPASS((opts & ~(RW_DUPOK | RW_NOPROFILE | RW_NOWITNESS | RW_QUIET |
  210             RW_RECURSE | RW_NEW)) == 0);
  211         ASSERT_ATOMIC_LOAD_PTR(rw->rw_lock,
  212             ("%s: rw_lock not aligned for %s: %p", __func__, name,
  213             &rw->rw_lock));
  214 
  215         flags = LO_UPGRADABLE;
  216         if (opts & RW_DUPOK)
  217                 flags |= LO_DUPOK;
  218         if (opts & RW_NOPROFILE)
  219                 flags |= LO_NOPROFILE;
  220         if (!(opts & RW_NOWITNESS))
  221                 flags |= LO_WITNESS;
  222         if (opts & RW_RECURSE)
  223                 flags |= LO_RECURSABLE;
  224         if (opts & RW_QUIET)
  225                 flags |= LO_QUIET;
  226         if (opts & RW_NEW)
  227                 flags |= LO_NEW;
  228 
  229         lock_init(&rw->lock_object, &lock_class_rw, name, NULL, flags);
  230         rw->rw_lock = RW_UNLOCKED;
  231         rw->rw_recurse = 0;
  232 }
  233 
  234 void
  235 _rw_destroy(volatile uintptr_t *c)
  236 {
  237         struct rwlock *rw;
  238 
  239         rw = rwlock2rw(c);
  240 
  241         KASSERT(rw->rw_lock == RW_UNLOCKED, ("rw lock %p not unlocked", rw));
  242         KASSERT(rw->rw_recurse == 0, ("rw lock %p still recursed", rw));
  243         rw->rw_lock = RW_DESTROYED;
  244         lock_destroy(&rw->lock_object);
  245 }
  246 
  247 void
  248 rw_sysinit(void *arg)
  249 {
  250         struct rw_args *args;
  251 
  252         args = arg;
  253         rw_init_flags((struct rwlock *)args->ra_rw, args->ra_desc,
  254             args->ra_flags);
  255 }
  256 
  257 int
  258 _rw_wowned(const volatile uintptr_t *c)
  259 {
  260 
  261         return (rw_wowner(rwlock2rw(c)) == curthread);
  262 }
  263 
  264 void
  265 _rw_wlock_cookie(volatile uintptr_t *c, const char *file, int line)
  266 {
  267         struct rwlock *rw;
  268         uintptr_t tid, v;
  269 
  270         rw = rwlock2rw(c);
  271 
  272         KASSERT(kdb_active != 0 || SCHEDULER_STOPPED() ||
  273             !TD_IS_IDLETHREAD(curthread),
  274             ("rw_wlock() by idle thread %p on rwlock %s @ %s:%d",
  275             curthread, rw->lock_object.lo_name, file, line));
  276         KASSERT(rw->rw_lock != RW_DESTROYED,
  277             ("rw_wlock() of destroyed rwlock @ %s:%d", file, line));
  278         WITNESS_CHECKORDER(&rw->lock_object, LOP_NEWORDER | LOP_EXCLUSIVE, file,
  279             line, NULL);
  280         tid = (uintptr_t)curthread;
  281         v = RW_UNLOCKED;
  282         if (!_rw_write_lock_fetch(rw, &v, tid))
  283                 _rw_wlock_hard(rw, v, file, line);
  284         else
  285                 LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(rw__acquire, rw,
  286                     0, 0, file, line, LOCKSTAT_WRITER);
  287 
  288         LOCK_LOG_LOCK("WLOCK", &rw->lock_object, 0, rw->rw_recurse, file, line);
  289         WITNESS_LOCK(&rw->lock_object, LOP_EXCLUSIVE, file, line);
  290         TD_LOCKS_INC(curthread);
  291 }
  292 
  293 int
  294 __rw_try_wlock_int(struct rwlock *rw LOCK_FILE_LINE_ARG_DEF)
  295 {
  296         struct thread *td;
  297         uintptr_t tid, v;
  298         int rval;
  299         bool recursed;
  300 
  301         td = curthread;
  302         tid = (uintptr_t)td;
  303         if (SCHEDULER_STOPPED_TD(td))
  304                 return (1);
  305 
  306         KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(td),
  307             ("rw_try_wlock() by idle thread %p on rwlock %s @ %s:%d",
  308             curthread, rw->lock_object.lo_name, file, line));
  309         KASSERT(rw->rw_lock != RW_DESTROYED,
  310             ("rw_try_wlock() of destroyed rwlock @ %s:%d", file, line));
  311 
  312         rval = 1;
  313         recursed = false;
  314         v = RW_UNLOCKED;
  315         for (;;) {
  316                 if (atomic_fcmpset_acq_ptr(&rw->rw_lock, &v, tid))
  317                         break;
  318                 if (v == RW_UNLOCKED)
  319                         continue;
  320                 if (v == tid && (rw->lock_object.lo_flags & LO_RECURSABLE)) {
  321                         rw->rw_recurse++;
  322                         atomic_set_ptr(&rw->rw_lock, RW_LOCK_WRITER_RECURSED);
  323                         break;
  324                 }
  325                 rval = 0;
  326                 break;
  327         }
  328 
  329         LOCK_LOG_TRY("WLOCK", &rw->lock_object, 0, rval, file, line);
  330         if (rval) {
  331                 WITNESS_LOCK(&rw->lock_object, LOP_EXCLUSIVE | LOP_TRYLOCK,
  332                     file, line);
  333                 if (!recursed)
  334                         LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(rw__acquire,
  335                             rw, 0, 0, file, line, LOCKSTAT_WRITER);
  336                 TD_LOCKS_INC(curthread);
  337         }
  338         return (rval);
  339 }
  340 
  341 int
  342 __rw_try_wlock(volatile uintptr_t *c, const char *file, int line)
  343 {
  344         struct rwlock *rw;
  345 
  346         rw = rwlock2rw(c);
  347         return (__rw_try_wlock_int(rw LOCK_FILE_LINE_ARG));
  348 }
  349 
  350 void
  351 _rw_wunlock_cookie(volatile uintptr_t *c, const char *file, int line)
  352 {
  353         struct rwlock *rw;
  354 
  355         rw = rwlock2rw(c);
  356 
  357         KASSERT(rw->rw_lock != RW_DESTROYED,
  358             ("rw_wunlock() of destroyed rwlock @ %s:%d", file, line));
  359         __rw_assert(c, RA_WLOCKED, file, line);
  360         WITNESS_UNLOCK(&rw->lock_object, LOP_EXCLUSIVE, file, line);
  361         LOCK_LOG_LOCK("WUNLOCK", &rw->lock_object, 0, rw->rw_recurse, file,
  362             line);
  363 
  364 #ifdef LOCK_PROFILING
  365         _rw_wunlock_hard(rw, (uintptr_t)curthread, file, line);
  366 #else
  367         __rw_wunlock(rw, curthread, file, line);
  368 #endif
  369 
  370         TD_LOCKS_DEC(curthread);
  371 }
  372 
  373 /*
  374  * Determines whether a new reader can acquire a lock.  Succeeds if the
  375  * reader already owns a read lock and the lock is locked for read to
  376  * prevent deadlock from reader recursion.  Also succeeds if the lock
  377  * is unlocked and has no writer waiters or spinners.  Failing otherwise
  378  * prioritizes writers before readers.
  379  */
  380 static bool __always_inline
  381 __rw_can_read(struct thread *td, uintptr_t v, bool fp)
  382 {
  383 
  384         if ((v & (RW_LOCK_READ | RW_LOCK_WRITE_WAITERS | RW_LOCK_WRITE_SPINNER))
  385             == RW_LOCK_READ)
  386                 return (true);
  387         if (!fp && td->td_rw_rlocks && (v & RW_LOCK_READ))
  388                 return (true);
  389         return (false);
  390 }
  391 
  392 static bool __always_inline
  393 __rw_rlock_try(struct rwlock *rw, struct thread *td, uintptr_t *vp, bool fp
  394     LOCK_FILE_LINE_ARG_DEF)
  395 {
  396 
  397         /*
  398          * Handle the easy case.  If no other thread has a write
  399          * lock, then try to bump up the count of read locks.  Note
  400          * that we have to preserve the current state of the
  401          * RW_LOCK_WRITE_WAITERS flag.  If we fail to acquire a
  402          * read lock, then rw_lock must have changed, so restart
  403          * the loop.  Note that this handles the case of a
  404          * completely unlocked rwlock since such a lock is encoded
  405          * as a read lock with no waiters.
  406          */
  407         while (__rw_can_read(td, *vp, fp)) {
  408                 if (atomic_fcmpset_acq_ptr(&rw->rw_lock, vp,
  409                         *vp + RW_ONE_READER)) {
  410                         if (LOCK_LOG_TEST(&rw->lock_object, 0))
  411                                 CTR4(KTR_LOCK,
  412                                     "%s: %p succeed %p -> %p", __func__,
  413                                     rw, (void *)*vp,
  414                                     (void *)(*vp + RW_ONE_READER));
  415                         td->td_rw_rlocks++;
  416                         return (true);
  417                 }
  418         }
  419         return (false);
  420 }
  421 
  422 static void __noinline
  423 __rw_rlock_hard(struct rwlock *rw, struct thread *td, uintptr_t v
  424     LOCK_FILE_LINE_ARG_DEF)
  425 {
  426         struct turnstile *ts;
  427         struct thread *owner;
  428 #ifdef ADAPTIVE_RWLOCKS
  429         int spintries = 0;
  430         int i, n;
  431 #endif
  432 #ifdef LOCK_PROFILING
  433         uint64_t waittime = 0;
  434         int contested = 0;
  435 #endif
  436 #if defined(ADAPTIVE_RWLOCKS) || defined(KDTRACE_HOOKS)
  437         struct lock_delay_arg lda;
  438 #endif
  439 #ifdef KDTRACE_HOOKS
  440         u_int sleep_cnt = 0;
  441         int64_t sleep_time = 0;
  442         int64_t all_time = 0;
  443 #endif
  444 #if defined(KDTRACE_HOOKS) || defined(LOCK_PROFILING)
  445         uintptr_t state;
  446         int doing_lockprof = 0;
  447 #endif
  448 
  449 #ifdef KDTRACE_HOOKS
  450         if (LOCKSTAT_PROFILE_ENABLED(rw__acquire)) {
  451                 if (__rw_rlock_try(rw, td, &v, false LOCK_FILE_LINE_ARG))
  452                         goto out_lockstat;
  453                 doing_lockprof = 1;
  454                 all_time -= lockstat_nsecs(&rw->lock_object);
  455                 state = v;
  456         }
  457 #endif
  458 #ifdef LOCK_PROFILING
  459         doing_lockprof = 1;
  460         state = v;
  461 #endif
  462 
  463         if (SCHEDULER_STOPPED())
  464                 return;
  465 
  466 #if defined(ADAPTIVE_RWLOCKS)
  467         lock_delay_arg_init(&lda, &rw_delay);
  468 #elif defined(KDTRACE_HOOKS)
  469         lock_delay_arg_init(&lda, NULL);
  470 #endif
  471 
  472 #ifdef HWPMC_HOOKS
  473         PMC_SOFT_CALL( , , lock, failed);
  474 #endif
  475         lock_profile_obtain_lock_failed(&rw->lock_object,
  476             &contested, &waittime);
  477 
  478         for (;;) {
  479                 if (__rw_rlock_try(rw, td, &v, false LOCK_FILE_LINE_ARG))
  480                         break;
  481 #ifdef KDTRACE_HOOKS
  482                 lda.spin_cnt++;
  483 #endif
  484 
  485 #ifdef ADAPTIVE_RWLOCKS
  486                 /*
  487                  * If the owner is running on another CPU, spin until
  488                  * the owner stops running or the state of the lock
  489                  * changes.
  490                  */
  491                 if ((v & RW_LOCK_READ) == 0) {
  492                         owner = (struct thread *)RW_OWNER(v);
  493                         if (TD_IS_RUNNING(owner)) {
  494                                 if (LOCK_LOG_TEST(&rw->lock_object, 0))
  495                                         CTR3(KTR_LOCK,
  496                                             "%s: spinning on %p held by %p",
  497                                             __func__, rw, owner);
  498                                 KTR_STATE1(KTR_SCHED, "thread",
  499                                     sched_tdname(curthread), "spinning",
  500                                     "lockname:\"%s\"", rw->lock_object.lo_name);
  501                                 do {
  502                                         lock_delay(&lda);
  503                                         v = RW_READ_VALUE(rw);
  504                                         owner = lv_rw_wowner(v);
  505                                 } while (owner != NULL && TD_IS_RUNNING(owner));
  506                                 KTR_STATE0(KTR_SCHED, "thread",
  507                                     sched_tdname(curthread), "running");
  508                                 continue;
  509                         }
  510                 } else if (spintries < rowner_retries) {
  511                         spintries++;
  512                         KTR_STATE1(KTR_SCHED, "thread", sched_tdname(curthread),
  513                             "spinning", "lockname:\"%s\"",
  514                             rw->lock_object.lo_name);
  515                         for (i = 0; i < rowner_loops; i += n) {
  516                                 n = RW_READERS(v);
  517                                 lock_delay_spin(n);
  518                                 v = RW_READ_VALUE(rw);
  519                                 if ((v & RW_LOCK_READ) == 0 || __rw_can_read(td, v, false))
  520                                         break;
  521                         }
  522 #ifdef KDTRACE_HOOKS
  523                         lda.spin_cnt += rowner_loops - i;
  524 #endif
  525                         KTR_STATE0(KTR_SCHED, "thread", sched_tdname(curthread),
  526                             "running");
  527                         if (i < rowner_loops)
  528                                 continue;
  529                 }
  530 #endif
  531 
  532                 /*
  533                  * Okay, now it's the hard case.  Some other thread already
  534                  * has a write lock or there are write waiters present,
  535                  * acquire the turnstile lock so we can begin the process
  536                  * of blocking.
  537                  */
  538                 ts = turnstile_trywait(&rw->lock_object);
  539 
  540                 /*
  541                  * The lock might have been released while we spun, so
  542                  * recheck its state and restart the loop if needed.
  543                  */
  544                 v = RW_READ_VALUE(rw);
  545 retry_ts:
  546                 if (__rw_can_read(td, v, false)) {
  547                         turnstile_cancel(ts);
  548                         continue;
  549                 }
  550 
  551                 owner = lv_rw_wowner(v);
  552 
  553 #ifdef ADAPTIVE_RWLOCKS
  554                 /*
  555                  * The current lock owner might have started executing
  556                  * on another CPU (or the lock could have changed
  557                  * owners) while we were waiting on the turnstile
  558                  * chain lock.  If so, drop the turnstile lock and try
  559                  * again.
  560                  */
  561                 if (owner != NULL) {
  562                         if (TD_IS_RUNNING(owner)) {
  563                                 turnstile_cancel(ts);
  564                                 continue;
  565                         }
  566                 }
  567 #endif
  568 
  569                 /*
  570                  * The lock is held in write mode or it already has waiters.
  571                  */
  572                 MPASS(!__rw_can_read(td, v, false));
  573 
  574                 /*
  575                  * If the RW_LOCK_READ_WAITERS flag is already set, then
  576                  * we can go ahead and block.  If it is not set then try
  577                  * to set it.  If we fail to set it drop the turnstile
  578                  * lock and restart the loop.
  579                  */
  580                 if (!(v & RW_LOCK_READ_WAITERS)) {
  581                         if (!atomic_fcmpset_ptr(&rw->rw_lock, &v,
  582                             v | RW_LOCK_READ_WAITERS))
  583                                 goto retry_ts;
  584                         if (LOCK_LOG_TEST(&rw->lock_object, 0))
  585                                 CTR2(KTR_LOCK, "%s: %p set read waiters flag",
  586                                     __func__, rw);
  587                 }
  588 
  589                 /*
  590                  * We were unable to acquire the lock and the read waiters
  591                  * flag is set, so we must block on the turnstile.
  592                  */
  593                 if (LOCK_LOG_TEST(&rw->lock_object, 0))
  594                         CTR2(KTR_LOCK, "%s: %p blocking on turnstile", __func__,
  595                             rw);
  596 #ifdef KDTRACE_HOOKS
  597                 sleep_time -= lockstat_nsecs(&rw->lock_object);
  598 #endif
  599                 MPASS(owner == rw_owner(rw));
  600                 turnstile_wait(ts, owner, TS_SHARED_QUEUE);
  601 #ifdef KDTRACE_HOOKS
  602                 sleep_time += lockstat_nsecs(&rw->lock_object);
  603                 sleep_cnt++;
  604 #endif
  605                 if (LOCK_LOG_TEST(&rw->lock_object, 0))
  606                         CTR2(KTR_LOCK, "%s: %p resuming from turnstile",
  607                             __func__, rw);
  608                 v = RW_READ_VALUE(rw);
  609         }
  610 #if defined(KDTRACE_HOOKS) || defined(LOCK_PROFILING)
  611         if (__predict_true(!doing_lockprof))
  612                 return;
  613 #endif
  614 #ifdef KDTRACE_HOOKS
  615         all_time += lockstat_nsecs(&rw->lock_object);
  616         if (sleep_time)
  617                 LOCKSTAT_RECORD4(rw__block, rw, sleep_time,
  618                     LOCKSTAT_READER, (state & RW_LOCK_READ) == 0,
  619                     (state & RW_LOCK_READ) == 0 ? 0 : RW_READERS(state));
  620 
  621         /* Record only the loops spinning and not sleeping. */
  622         if (lda.spin_cnt > sleep_cnt)
  623                 LOCKSTAT_RECORD4(rw__spin, rw, all_time - sleep_time,
  624                     LOCKSTAT_READER, (state & RW_LOCK_READ) == 0,
  625                     (state & RW_LOCK_READ) == 0 ? 0 : RW_READERS(state));
  626 out_lockstat:
  627 #endif
  628         /*
  629          * TODO: acquire "owner of record" here.  Here be turnstile dragons
  630          * however.  turnstiles don't like owners changing between calls to
  631          * turnstile_wait() currently.
  632          */
  633         LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(rw__acquire, rw, contested,
  634             waittime, file, line, LOCKSTAT_READER);
  635 }
  636 
  637 void
  638 __rw_rlock_int(struct rwlock *rw LOCK_FILE_LINE_ARG_DEF)
  639 {
  640         struct thread *td;
  641         uintptr_t v;
  642 
  643         td = curthread;
  644 
  645         KASSERT(kdb_active != 0 || SCHEDULER_STOPPED_TD(td) ||
  646             !TD_IS_IDLETHREAD(td),
  647             ("rw_rlock() by idle thread %p on rwlock %s @ %s:%d",
  648             td, rw->lock_object.lo_name, file, line));
  649         KASSERT(rw->rw_lock != RW_DESTROYED,
  650             ("rw_rlock() of destroyed rwlock @ %s:%d", file, line));
  651         KASSERT(rw_wowner(rw) != td,
  652             ("rw_rlock: wlock already held for %s @ %s:%d",
  653             rw->lock_object.lo_name, file, line));
  654         WITNESS_CHECKORDER(&rw->lock_object, LOP_NEWORDER, file, line, NULL);
  655 
  656         v = RW_READ_VALUE(rw);
  657         if (__predict_false(LOCKSTAT_PROFILE_ENABLED(rw__acquire) ||
  658             !__rw_rlock_try(rw, td, &v, true LOCK_FILE_LINE_ARG)))
  659                 __rw_rlock_hard(rw, td, v LOCK_FILE_LINE_ARG);
  660         else
  661                 lock_profile_obtain_lock_success(&rw->lock_object, 0, 0,
  662                     file, line);
  663 
  664         LOCK_LOG_LOCK("RLOCK", &rw->lock_object, 0, 0, file, line);
  665         WITNESS_LOCK(&rw->lock_object, 0, file, line);
  666         TD_LOCKS_INC(curthread);
  667 }
  668 
  669 void
  670 __rw_rlock(volatile uintptr_t *c, const char *file, int line)
  671 {
  672         struct rwlock *rw;
  673 
  674         rw = rwlock2rw(c);
  675         __rw_rlock_int(rw LOCK_FILE_LINE_ARG);
  676 }
  677 
  678 int
  679 __rw_try_rlock_int(struct rwlock *rw LOCK_FILE_LINE_ARG_DEF)
  680 {
  681         uintptr_t x;
  682 
  683         if (SCHEDULER_STOPPED())
  684                 return (1);
  685 
  686         KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread),
  687             ("rw_try_rlock() by idle thread %p on rwlock %s @ %s:%d",
  688             curthread, rw->lock_object.lo_name, file, line));
  689 
  690         x = rw->rw_lock;
  691         for (;;) {
  692                 KASSERT(rw->rw_lock != RW_DESTROYED,
  693                     ("rw_try_rlock() of destroyed rwlock @ %s:%d", file, line));
  694                 if (!(x & RW_LOCK_READ))
  695                         break;
  696                 if (atomic_fcmpset_acq_ptr(&rw->rw_lock, &x, x + RW_ONE_READER)) {
  697                         LOCK_LOG_TRY("RLOCK", &rw->lock_object, 0, 1, file,
  698                             line);
  699                         WITNESS_LOCK(&rw->lock_object, LOP_TRYLOCK, file, line);
  700                         LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(rw__acquire,
  701                             rw, 0, 0, file, line, LOCKSTAT_READER);
  702                         TD_LOCKS_INC(curthread);
  703                         curthread->td_rw_rlocks++;
  704                         return (1);
  705                 }
  706         }
  707 
  708         LOCK_LOG_TRY("RLOCK", &rw->lock_object, 0, 0, file, line);
  709         return (0);
  710 }
  711 
  712 int
  713 __rw_try_rlock(volatile uintptr_t *c, const char *file, int line)
  714 {
  715         struct rwlock *rw;
  716 
  717         rw = rwlock2rw(c);
  718         return (__rw_try_rlock_int(rw LOCK_FILE_LINE_ARG));
  719 }
  720 
  721 static bool __always_inline
  722 __rw_runlock_try(struct rwlock *rw, struct thread *td, uintptr_t *vp)
  723 {
  724 
  725         for (;;) {
  726                 /*
  727                  * See if there is more than one read lock held.  If so,
  728                  * just drop one and return.
  729                  */
  730                 if (RW_READERS(*vp) > 1) {
  731                         if (atomic_fcmpset_rel_ptr(&rw->rw_lock, vp,
  732                             *vp - RW_ONE_READER)) {
  733                                 if (LOCK_LOG_TEST(&rw->lock_object, 0))
  734                                         CTR4(KTR_LOCK,
  735                                             "%s: %p succeeded %p -> %p",
  736                                             __func__, rw, (void *)*vp,
  737                                             (void *)(*vp - RW_ONE_READER));
  738                                 td->td_rw_rlocks--;
  739                                 return (true);
  740                         }
  741                         continue;
  742                 }
  743                 /*
  744                  * If there aren't any waiters for a write lock, then try
  745                  * to drop it quickly.
  746                  */
  747                 if (!(*vp & RW_LOCK_WAITERS)) {
  748                         MPASS((*vp & ~RW_LOCK_WRITE_SPINNER) ==
  749                             RW_READERS_LOCK(1));
  750                         if (atomic_fcmpset_rel_ptr(&rw->rw_lock, vp,
  751                             RW_UNLOCKED)) {
  752                                 if (LOCK_LOG_TEST(&rw->lock_object, 0))
  753                                         CTR2(KTR_LOCK, "%s: %p last succeeded",
  754                                             __func__, rw);
  755                                 td->td_rw_rlocks--;
  756                                 return (true);
  757                         }
  758                         continue;
  759                 }
  760                 break;
  761         }
  762         return (false);
  763 }
  764 
  765 static void __noinline
  766 __rw_runlock_hard(struct rwlock *rw, struct thread *td, uintptr_t v
  767     LOCK_FILE_LINE_ARG_DEF)
  768 {
  769         struct turnstile *ts;
  770         uintptr_t setv, queue;
  771 
  772         if (SCHEDULER_STOPPED())
  773                 return;
  774 
  775         if (__rw_runlock_try(rw, td, &v))
  776                 goto out_lockstat;
  777 
  778         /*
  779          * Ok, we know we have waiters and we think we are the
  780          * last reader, so grab the turnstile lock.
  781          */
  782         turnstile_chain_lock(&rw->lock_object);
  783         v = RW_READ_VALUE(rw);
  784         for (;;) {
  785                 if (__rw_runlock_try(rw, td, &v))
  786                         break;
  787 
  788                 v &= (RW_LOCK_WAITERS | RW_LOCK_WRITE_SPINNER);
  789                 MPASS(v & RW_LOCK_WAITERS);
  790 
  791                 /*
  792                  * Try to drop our lock leaving the lock in a unlocked
  793                  * state.
  794                  *
  795                  * If you wanted to do explicit lock handoff you'd have to
  796                  * do it here.  You'd also want to use turnstile_signal()
  797                  * and you'd have to handle the race where a higher
  798                  * priority thread blocks on the write lock before the
  799                  * thread you wakeup actually runs and have the new thread
  800                  * "steal" the lock.  For now it's a lot simpler to just
  801                  * wakeup all of the waiters.
  802                  *
  803                  * As above, if we fail, then another thread might have
  804                  * acquired a read lock, so drop the turnstile lock and
  805                  * restart.
  806                  */
  807                 setv = RW_UNLOCKED;
  808                 queue = TS_SHARED_QUEUE;
  809                 if (v & RW_LOCK_WRITE_WAITERS) {
  810                         queue = TS_EXCLUSIVE_QUEUE;
  811                         setv |= (v & RW_LOCK_READ_WAITERS);
  812                 }
  813                 v |= RW_READERS_LOCK(1);
  814                 if (!atomic_fcmpset_rel_ptr(&rw->rw_lock, &v, setv))
  815                         continue;
  816                 if (LOCK_LOG_TEST(&rw->lock_object, 0))
  817                         CTR2(KTR_LOCK, "%s: %p last succeeded with waiters",
  818                             __func__, rw);
  819 
  820                 /*
  821                  * Ok.  The lock is released and all that's left is to
  822                  * wake up the waiters.  Note that the lock might not be
  823                  * free anymore, but in that case the writers will just
  824                  * block again if they run before the new lock holder(s)
  825                  * release the lock.
  826                  */
  827                 ts = turnstile_lookup(&rw->lock_object);
  828                 MPASS(ts != NULL);
  829                 turnstile_broadcast(ts, queue);
  830                 turnstile_unpend(ts, TS_SHARED_LOCK);
  831                 td->td_rw_rlocks--;
  832                 break;
  833         }
  834         turnstile_chain_unlock(&rw->lock_object);
  835 out_lockstat:
  836         LOCKSTAT_PROFILE_RELEASE_RWLOCK(rw__release, rw, LOCKSTAT_READER);
  837 }
  838 
  839 void
  840 _rw_runlock_cookie_int(struct rwlock *rw LOCK_FILE_LINE_ARG_DEF)
  841 {
  842         struct thread *td;
  843         uintptr_t v;
  844 
  845         KASSERT(rw->rw_lock != RW_DESTROYED,
  846             ("rw_runlock() of destroyed rwlock @ %s:%d", file, line));
  847         __rw_assert(&rw->rw_lock, RA_RLOCKED, file, line);
  848         WITNESS_UNLOCK(&rw->lock_object, 0, file, line);
  849         LOCK_LOG_LOCK("RUNLOCK", &rw->lock_object, 0, 0, file, line);
  850 
  851         td = curthread;
  852         v = RW_READ_VALUE(rw);
  853 
  854         if (__predict_false(LOCKSTAT_PROFILE_ENABLED(rw__release) ||
  855             !__rw_runlock_try(rw, td, &v)))
  856                 __rw_runlock_hard(rw, td, v LOCK_FILE_LINE_ARG);
  857         else
  858                 lock_profile_release_lock(&rw->lock_object);
  859 
  860         TD_LOCKS_DEC(curthread);
  861 }
  862 
  863 void
  864 _rw_runlock_cookie(volatile uintptr_t *c, const char *file, int line)
  865 {
  866         struct rwlock *rw;
  867 
  868         rw = rwlock2rw(c);
  869         _rw_runlock_cookie_int(rw LOCK_FILE_LINE_ARG);
  870 }
  871 
  872 /*
  873  * This function is called when we are unable to obtain a write lock on the
  874  * first try.  This means that at least one other thread holds either a
  875  * read or write lock.
  876  */
  877 void
  878 __rw_wlock_hard(volatile uintptr_t *c, uintptr_t v LOCK_FILE_LINE_ARG_DEF)
  879 {
  880         uintptr_t tid;
  881         struct rwlock *rw;
  882         struct turnstile *ts;
  883         struct thread *owner;
  884 #ifdef ADAPTIVE_RWLOCKS
  885         int spintries = 0;
  886         int i, n;
  887         enum { READERS, WRITER } sleep_reason = READERS;
  888 #endif
  889         uintptr_t x;
  890 #ifdef LOCK_PROFILING
  891         uint64_t waittime = 0;
  892         int contested = 0;
  893 #endif
  894 #if defined(ADAPTIVE_RWLOCKS) || defined(KDTRACE_HOOKS)
  895         struct lock_delay_arg lda;
  896 #endif
  897 #ifdef KDTRACE_HOOKS
  898         u_int sleep_cnt = 0;
  899         int64_t sleep_time = 0;
  900         int64_t all_time = 0;
  901 #endif
  902 #if defined(KDTRACE_HOOKS) || defined(LOCK_PROFILING)
  903         uintptr_t state;
  904         int doing_lockprof = 0;
  905 #endif
  906 
  907         tid = (uintptr_t)curthread;
  908         rw = rwlock2rw(c);
  909 
  910 #ifdef KDTRACE_HOOKS
  911         if (LOCKSTAT_PROFILE_ENABLED(rw__acquire)) {
  912                 while (v == RW_UNLOCKED) {
  913                         if (_rw_write_lock_fetch(rw, &v, tid))
  914                                 goto out_lockstat;
  915                 }
  916                 doing_lockprof = 1;
  917                 all_time -= lockstat_nsecs(&rw->lock_object);
  918                 state = v;
  919         }
  920 #endif
  921 #ifdef LOCK_PROFILING
  922         doing_lockprof = 1;
  923         state = v;
  924 #endif
  925 
  926         if (SCHEDULER_STOPPED())
  927                 return;
  928 
  929 #if defined(ADAPTIVE_RWLOCKS)
  930         lock_delay_arg_init(&lda, &rw_delay);
  931 #elif defined(KDTRACE_HOOKS)
  932         lock_delay_arg_init(&lda, NULL);
  933 #endif
  934         if (__predict_false(v == RW_UNLOCKED))
  935                 v = RW_READ_VALUE(rw);
  936 
  937         if (__predict_false(lv_rw_wowner(v) == (struct thread *)tid)) {
  938                 KASSERT(rw->lock_object.lo_flags & LO_RECURSABLE,
  939                     ("%s: recursing but non-recursive rw %s @ %s:%d\n",
  940                     __func__, rw->lock_object.lo_name, file, line));
  941                 rw->rw_recurse++;
  942                 atomic_set_ptr(&rw->rw_lock, RW_LOCK_WRITER_RECURSED);
  943                 if (LOCK_LOG_TEST(&rw->lock_object, 0))
  944                         CTR2(KTR_LOCK, "%s: %p recursing", __func__, rw);
  945                 return;
  946         }
  947 
  948         if (LOCK_LOG_TEST(&rw->lock_object, 0))
  949                 CTR5(KTR_LOCK, "%s: %s contested (lock=%p) at %s:%d", __func__,
  950                     rw->lock_object.lo_name, (void *)rw->rw_lock, file, line);
  951 
  952 #ifdef HWPMC_HOOKS
  953         PMC_SOFT_CALL( , , lock, failed);
  954 #endif
  955         lock_profile_obtain_lock_failed(&rw->lock_object,
  956             &contested, &waittime);
  957 
  958         for (;;) {
  959                 if (v == RW_UNLOCKED) {
  960                         if (_rw_write_lock_fetch(rw, &v, tid))
  961                                 break;
  962                         continue;
  963                 }
  964 #ifdef KDTRACE_HOOKS
  965                 lda.spin_cnt++;
  966 #endif
  967 
  968 #ifdef ADAPTIVE_RWLOCKS
  969                 /*
  970                  * If the lock is write locked and the owner is
  971                  * running on another CPU, spin until the owner stops
  972                  * running or the state of the lock changes.
  973                  */
  974                 if (!(v & RW_LOCK_READ)) {
  975                         sleep_reason = WRITER;
  976                         owner = lv_rw_wowner(v);
  977                         if (!TD_IS_RUNNING(owner))
  978                                 goto ts;
  979                         if (LOCK_LOG_TEST(&rw->lock_object, 0))
  980                                 CTR3(KTR_LOCK, "%s: spinning on %p held by %p",
  981                                     __func__, rw, owner);
  982                         KTR_STATE1(KTR_SCHED, "thread", sched_tdname(curthread),
  983                             "spinning", "lockname:\"%s\"",
  984                             rw->lock_object.lo_name);
  985                         do {
  986                                 lock_delay(&lda);
  987                                 v = RW_READ_VALUE(rw);
  988                                 owner = lv_rw_wowner(v);
  989                         } while (owner != NULL && TD_IS_RUNNING(owner));
  990                         KTR_STATE0(KTR_SCHED, "thread", sched_tdname(curthread),
  991                             "running");
  992                         continue;
  993                 } else if (RW_READERS(v) > 0) {
  994                         sleep_reason = READERS;
  995                         if (spintries == rowner_retries)
  996                                 goto ts;
  997                         if (!(v & RW_LOCK_WRITE_SPINNER)) {
  998                                 if (!atomic_fcmpset_ptr(&rw->rw_lock, &v,
  999                                     v | RW_LOCK_WRITE_SPINNER)) {
 1000                                         continue;
 1001                                 }
 1002                         }
 1003                         spintries++;
 1004                         KTR_STATE1(KTR_SCHED, "thread", sched_tdname(curthread),
 1005                             "spinning", "lockname:\"%s\"",
 1006                             rw->lock_object.lo_name);
 1007                         for (i = 0; i < rowner_loops; i += n) {
 1008                                 n = RW_READERS(v);
 1009                                 lock_delay_spin(n);
 1010                                 v = RW_READ_VALUE(rw);
 1011                                 if ((v & RW_LOCK_WRITE_SPINNER) == 0)
 1012                                         break;
 1013                         }
 1014 #ifdef KDTRACE_HOOKS
 1015                         lda.spin_cnt += i;
 1016 #endif
 1017                         KTR_STATE0(KTR_SCHED, "thread", sched_tdname(curthread),
 1018                             "running");
 1019                         if (i < rowner_loops)
 1020                                 continue;
 1021                 }
 1022 ts:
 1023 #endif
 1024                 ts = turnstile_trywait(&rw->lock_object);
 1025                 v = RW_READ_VALUE(rw);
 1026 retry_ts:
 1027                 owner = lv_rw_wowner(v);
 1028 
 1029 #ifdef ADAPTIVE_RWLOCKS
 1030                 /*
 1031                  * The current lock owner might have started executing
 1032                  * on another CPU (or the lock could have changed
 1033                  * owners) while we were waiting on the turnstile
 1034                  * chain lock.  If so, drop the turnstile lock and try
 1035                  * again.
 1036                  */
 1037                 if (owner != NULL) {
 1038                         if (TD_IS_RUNNING(owner)) {
 1039                                 turnstile_cancel(ts);
 1040                                 continue;
 1041                         }
 1042                 } else if (RW_READERS(v) > 0 && sleep_reason == WRITER) {
 1043                         turnstile_cancel(ts);
 1044                         continue;
 1045                 }
 1046 #endif
 1047                 /*
 1048                  * Check for the waiters flags about this rwlock.
 1049                  * If the lock was released, without maintain any pending
 1050                  * waiters queue, simply try to acquire it.
 1051                  * If a pending waiters queue is present, claim the lock
 1052                  * ownership and maintain the pending queue.
 1053                  */
 1054                 x = v & (RW_LOCK_WAITERS | RW_LOCK_WRITE_SPINNER);
 1055                 if ((v & ~x) == RW_UNLOCKED) {
 1056                         x &= ~RW_LOCK_WRITE_SPINNER;
 1057                         if (atomic_fcmpset_acq_ptr(&rw->rw_lock, &v, tid | x)) {
 1058                                 if (x)
 1059                                         turnstile_claim(ts);
 1060                                 else
 1061                                         turnstile_cancel(ts);
 1062                                 break;
 1063                         }
 1064                         goto retry_ts;
 1065                 }
 1066                 /*
 1067                  * If the RW_LOCK_WRITE_WAITERS flag isn't set, then try to
 1068                  * set it.  If we fail to set it, then loop back and try
 1069                  * again.
 1070                  */
 1071                 if (!(v & RW_LOCK_WRITE_WAITERS)) {
 1072                         if (!atomic_fcmpset_ptr(&rw->rw_lock, &v,
 1073                             v | RW_LOCK_WRITE_WAITERS))
 1074                                 goto retry_ts;
 1075                         if (LOCK_LOG_TEST(&rw->lock_object, 0))
 1076                                 CTR2(KTR_LOCK, "%s: %p set write waiters flag",
 1077                                     __func__, rw);
 1078                 }
 1079                 /*
 1080                  * We were unable to acquire the lock and the write waiters
 1081                  * flag is set, so we must block on the turnstile.
 1082                  */
 1083                 if (LOCK_LOG_TEST(&rw->lock_object, 0))
 1084                         CTR2(KTR_LOCK, "%s: %p blocking on turnstile", __func__,
 1085                             rw);
 1086 #ifdef KDTRACE_HOOKS
 1087                 sleep_time -= lockstat_nsecs(&rw->lock_object);
 1088 #endif
 1089                 MPASS(owner == rw_owner(rw));
 1090                 turnstile_wait(ts, owner, TS_EXCLUSIVE_QUEUE);
 1091 #ifdef KDTRACE_HOOKS
 1092                 sleep_time += lockstat_nsecs(&rw->lock_object);
 1093                 sleep_cnt++;
 1094 #endif
 1095                 if (LOCK_LOG_TEST(&rw->lock_object, 0))
 1096                         CTR2(KTR_LOCK, "%s: %p resuming from turnstile",
 1097                             __func__, rw);
 1098 #ifdef ADAPTIVE_RWLOCKS
 1099                 spintries = 0;
 1100 #endif
 1101                 v = RW_READ_VALUE(rw);
 1102         }
 1103 #if defined(KDTRACE_HOOKS) || defined(LOCK_PROFILING)
 1104         if (__predict_true(!doing_lockprof))
 1105                 return;
 1106 #endif
 1107 #ifdef KDTRACE_HOOKS
 1108         all_time += lockstat_nsecs(&rw->lock_object);
 1109         if (sleep_time)
 1110                 LOCKSTAT_RECORD4(rw__block, rw, sleep_time,
 1111                     LOCKSTAT_WRITER, (state & RW_LOCK_READ) == 0,
 1112                     (state & RW_LOCK_READ) == 0 ? 0 : RW_READERS(state));
 1113 
 1114         /* Record only the loops spinning and not sleeping. */
 1115         if (lda.spin_cnt > sleep_cnt)
 1116                 LOCKSTAT_RECORD4(rw__spin, rw, all_time - sleep_time,
 1117                     LOCKSTAT_WRITER, (state & RW_LOCK_READ) == 0,
 1118                     (state & RW_LOCK_READ) == 0 ? 0 : RW_READERS(state));
 1119 out_lockstat:
 1120 #endif
 1121         LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(rw__acquire, rw, contested,
 1122             waittime, file, line, LOCKSTAT_WRITER);
 1123 }
 1124 
 1125 /*
 1126  * This function is called if lockstat is active or the first try at releasing
 1127  * a write lock failed.  The latter means that the lock is recursed or one of
 1128  * the 2 waiter bits must be set indicating that at least one thread is waiting
 1129  * on this lock.
 1130  */
 1131 void
 1132 __rw_wunlock_hard(volatile uintptr_t *c, uintptr_t v LOCK_FILE_LINE_ARG_DEF)
 1133 {
 1134         struct rwlock *rw;
 1135         struct turnstile *ts;
 1136         uintptr_t tid, setv;
 1137         int queue;
 1138 
 1139         tid = (uintptr_t)curthread;
 1140         if (SCHEDULER_STOPPED())
 1141                 return;
 1142 
 1143         rw = rwlock2rw(c);
 1144         if (__predict_false(v == tid))
 1145                 v = RW_READ_VALUE(rw);
 1146 
 1147         if (v & RW_LOCK_WRITER_RECURSED) {
 1148                 if (--(rw->rw_recurse) == 0)
 1149                         atomic_clear_ptr(&rw->rw_lock, RW_LOCK_WRITER_RECURSED);
 1150                 if (LOCK_LOG_TEST(&rw->lock_object, 0))
 1151                         CTR2(KTR_LOCK, "%s: %p unrecursing", __func__, rw);
 1152                 return;
 1153         }
 1154 
 1155         LOCKSTAT_PROFILE_RELEASE_RWLOCK(rw__release, rw, LOCKSTAT_WRITER);
 1156         if (v == tid && _rw_write_unlock(rw, tid))
 1157                 return;
 1158 
 1159         KASSERT(rw->rw_lock & (RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS),
 1160             ("%s: neither of the waiter flags are set", __func__));
 1161 
 1162         if (LOCK_LOG_TEST(&rw->lock_object, 0))
 1163                 CTR2(KTR_LOCK, "%s: %p contested", __func__, rw);
 1164 
 1165         turnstile_chain_lock(&rw->lock_object);
 1166 
 1167         /*
 1168          * Use the same algo as sx locks for now.  Prefer waking up shared
 1169          * waiters if we have any over writers.  This is probably not ideal.
 1170          *
 1171          * 'v' is the value we are going to write back to rw_lock.  If we
 1172          * have waiters on both queues, we need to preserve the state of
 1173          * the waiter flag for the queue we don't wake up.  For now this is
 1174          * hardcoded for the algorithm mentioned above.
 1175          *
 1176          * In the case of both readers and writers waiting we wakeup the
 1177          * readers but leave the RW_LOCK_WRITE_WAITERS flag set.  If a
 1178          * new writer comes in before a reader it will claim the lock up
 1179          * above.  There is probably a potential priority inversion in
 1180          * there that could be worked around either by waking both queues
 1181          * of waiters or doing some complicated lock handoff gymnastics.
 1182          */
 1183         setv = RW_UNLOCKED;
 1184         v = RW_READ_VALUE(rw);
 1185         queue = TS_SHARED_QUEUE;
 1186         if (v & RW_LOCK_WRITE_WAITERS) {
 1187                 queue = TS_EXCLUSIVE_QUEUE;
 1188                 setv |= (v & RW_LOCK_READ_WAITERS);
 1189         }
 1190         atomic_store_rel_ptr(&rw->rw_lock, setv);
 1191 
 1192         /* Wake up all waiters for the specific queue. */
 1193         if (LOCK_LOG_TEST(&rw->lock_object, 0))
 1194                 CTR3(KTR_LOCK, "%s: %p waking up %s waiters", __func__, rw,
 1195                     queue == TS_SHARED_QUEUE ? "read" : "write");
 1196 
 1197         ts = turnstile_lookup(&rw->lock_object);
 1198         MPASS(ts != NULL);
 1199         turnstile_broadcast(ts, queue);
 1200         turnstile_unpend(ts, TS_EXCLUSIVE_LOCK);
 1201         turnstile_chain_unlock(&rw->lock_object);
 1202 }
 1203 
 1204 /*
 1205  * Attempt to do a non-blocking upgrade from a read lock to a write
 1206  * lock.  This will only succeed if this thread holds a single read
 1207  * lock.  Returns true if the upgrade succeeded and false otherwise.
 1208  */
 1209 int
 1210 __rw_try_upgrade_int(struct rwlock *rw LOCK_FILE_LINE_ARG_DEF)
 1211 {
 1212         uintptr_t v, setv, tid;
 1213         struct turnstile *ts;
 1214         int success;
 1215 
 1216         if (SCHEDULER_STOPPED())
 1217                 return (1);
 1218 
 1219         KASSERT(rw->rw_lock != RW_DESTROYED,
 1220             ("rw_try_upgrade() of destroyed rwlock @ %s:%d", file, line));
 1221         __rw_assert(&rw->rw_lock, RA_RLOCKED, file, line);
 1222 
 1223         /*
 1224          * Attempt to switch from one reader to a writer.  If there
 1225          * are any write waiters, then we will have to lock the
 1226          * turnstile first to prevent races with another writer
 1227          * calling turnstile_wait() before we have claimed this
 1228          * turnstile.  So, do the simple case of no waiters first.
 1229          */
 1230         tid = (uintptr_t)curthread;
 1231         success = 0;
 1232         v = RW_READ_VALUE(rw);
 1233         for (;;) {
 1234                 if (RW_READERS(v) > 1)
 1235                         break;
 1236                 if (!(v & RW_LOCK_WAITERS)) {
 1237                         success = atomic_fcmpset_acq_ptr(&rw->rw_lock, &v, tid);
 1238                         if (!success)
 1239                                 continue;
 1240                         break;
 1241                 }
 1242 
 1243                 /*
 1244                  * Ok, we think we have waiters, so lock the turnstile.
 1245                  */
 1246                 ts = turnstile_trywait(&rw->lock_object);
 1247                 v = RW_READ_VALUE(rw);
 1248 retry_ts:
 1249                 if (RW_READERS(v) > 1) {
 1250                         turnstile_cancel(ts);
 1251                         break;
 1252                 }
 1253                 /*
 1254                  * Try to switch from one reader to a writer again.  This time
 1255                  * we honor the current state of the waiters flags.
 1256                  * If we obtain the lock with the flags set, then claim
 1257                  * ownership of the turnstile.
 1258                  */
 1259                 setv = tid | (v & RW_LOCK_WAITERS);
 1260                 success = atomic_fcmpset_ptr(&rw->rw_lock, &v, setv);
 1261                 if (success) {
 1262                         if (v & RW_LOCK_WAITERS)
 1263                                 turnstile_claim(ts);
 1264                         else
 1265                                 turnstile_cancel(ts);
 1266                         break;
 1267                 }
 1268                 goto retry_ts;
 1269         }
 1270         LOCK_LOG_TRY("WUPGRADE", &rw->lock_object, 0, success, file, line);
 1271         if (success) {
 1272                 curthread->td_rw_rlocks--;
 1273                 WITNESS_UPGRADE(&rw->lock_object, LOP_EXCLUSIVE | LOP_TRYLOCK,
 1274                     file, line);
 1275                 LOCKSTAT_RECORD0(rw__upgrade, rw);
 1276         }
 1277         return (success);
 1278 }
 1279 
 1280 int
 1281 __rw_try_upgrade(volatile uintptr_t *c, const char *file, int line)
 1282 {
 1283         struct rwlock *rw;
 1284 
 1285         rw = rwlock2rw(c);
 1286         return (__rw_try_upgrade_int(rw LOCK_FILE_LINE_ARG));
 1287 }
 1288 
 1289 /*
 1290  * Downgrade a write lock into a single read lock.
 1291  */
 1292 void
 1293 __rw_downgrade_int(struct rwlock *rw LOCK_FILE_LINE_ARG_DEF)
 1294 {
 1295         struct turnstile *ts;
 1296         uintptr_t tid, v;
 1297         int rwait, wwait;
 1298 
 1299         if (SCHEDULER_STOPPED())
 1300                 return;
 1301 
 1302         KASSERT(rw->rw_lock != RW_DESTROYED,
 1303             ("rw_downgrade() of destroyed rwlock @ %s:%d", file, line));
 1304         __rw_assert(&rw->rw_lock, RA_WLOCKED | RA_NOTRECURSED, file, line);
 1305 #ifndef INVARIANTS
 1306         if (rw_recursed(rw))
 1307                 panic("downgrade of a recursed lock");
 1308 #endif
 1309 
 1310         WITNESS_DOWNGRADE(&rw->lock_object, 0, file, line);
 1311 
 1312         /*
 1313          * Convert from a writer to a single reader.  First we handle
 1314          * the easy case with no waiters.  If there are any waiters, we
 1315          * lock the turnstile and "disown" the lock.
 1316          */
 1317         tid = (uintptr_t)curthread;
 1318         if (atomic_cmpset_rel_ptr(&rw->rw_lock, tid, RW_READERS_LOCK(1)))
 1319                 goto out;
 1320 
 1321         /*
 1322          * Ok, we think we have waiters, so lock the turnstile so we can
 1323          * read the waiter flags without any races.
 1324          */
 1325         turnstile_chain_lock(&rw->lock_object);
 1326         v = rw->rw_lock & RW_LOCK_WAITERS;
 1327         rwait = v & RW_LOCK_READ_WAITERS;
 1328         wwait = v & RW_LOCK_WRITE_WAITERS;
 1329         MPASS(rwait | wwait);
 1330 
 1331         /*
 1332          * Downgrade from a write lock while preserving waiters flag
 1333          * and give up ownership of the turnstile.
 1334          */
 1335         ts = turnstile_lookup(&rw->lock_object);
 1336         MPASS(ts != NULL);
 1337         if (!wwait)
 1338                 v &= ~RW_LOCK_READ_WAITERS;
 1339         atomic_store_rel_ptr(&rw->rw_lock, RW_READERS_LOCK(1) | v);
 1340         /*
 1341          * Wake other readers if there are no writers pending.  Otherwise they
 1342          * won't be able to acquire the lock anyway.
 1343          */
 1344         if (rwait && !wwait) {
 1345                 turnstile_broadcast(ts, TS_SHARED_QUEUE);
 1346                 turnstile_unpend(ts, TS_EXCLUSIVE_LOCK);
 1347         } else
 1348                 turnstile_disown(ts);
 1349         turnstile_chain_unlock(&rw->lock_object);
 1350 out:
 1351         curthread->td_rw_rlocks++;
 1352         LOCK_LOG_LOCK("WDOWNGRADE", &rw->lock_object, 0, 0, file, line);
 1353         LOCKSTAT_RECORD0(rw__downgrade, rw);
 1354 }
 1355 
 1356 void
 1357 __rw_downgrade(volatile uintptr_t *c, const char *file, int line)
 1358 {
 1359         struct rwlock *rw;
 1360 
 1361         rw = rwlock2rw(c);
 1362         __rw_downgrade_int(rw LOCK_FILE_LINE_ARG);
 1363 }
 1364 
 1365 #ifdef INVARIANT_SUPPORT
 1366 #ifndef INVARIANTS
 1367 #undef __rw_assert
 1368 #endif
 1369 
 1370 /*
 1371  * In the non-WITNESS case, rw_assert() can only detect that at least
 1372  * *some* thread owns an rlock, but it cannot guarantee that *this*
 1373  * thread owns an rlock.
 1374  */
 1375 void
 1376 __rw_assert(const volatile uintptr_t *c, int what, const char *file, int line)
 1377 {
 1378         const struct rwlock *rw;
 1379 
 1380         if (SCHEDULER_STOPPED())
 1381                 return;
 1382 
 1383         rw = rwlock2rw(c);
 1384 
 1385         switch (what) {
 1386         case RA_LOCKED:
 1387         case RA_LOCKED | RA_RECURSED:
 1388         case RA_LOCKED | RA_NOTRECURSED:
 1389         case RA_RLOCKED:
 1390         case RA_RLOCKED | RA_RECURSED:
 1391         case RA_RLOCKED | RA_NOTRECURSED:
 1392 #ifdef WITNESS
 1393                 witness_assert(&rw->lock_object, what, file, line);
 1394 #else
 1395                 /*
 1396                  * If some other thread has a write lock or we have one
 1397                  * and are asserting a read lock, fail.  Also, if no one
 1398                  * has a lock at all, fail.
 1399                  */
 1400                 if (rw->rw_lock == RW_UNLOCKED ||
 1401                     (!(rw->rw_lock & RW_LOCK_READ) && (what & RA_RLOCKED ||
 1402                     rw_wowner(rw) != curthread)))
 1403                         panic("Lock %s not %slocked @ %s:%d\n",
 1404                             rw->lock_object.lo_name, (what & RA_RLOCKED) ?
 1405                             "read " : "", file, line);
 1406 
 1407                 if (!(rw->rw_lock & RW_LOCK_READ) && !(what & RA_RLOCKED)) {
 1408                         if (rw_recursed(rw)) {
 1409                                 if (what & RA_NOTRECURSED)
 1410                                         panic("Lock %s recursed @ %s:%d\n",
 1411                                             rw->lock_object.lo_name, file,
 1412                                             line);
 1413                         } else if (what & RA_RECURSED)
 1414                                 panic("Lock %s not recursed @ %s:%d\n",
 1415                                     rw->lock_object.lo_name, file, line);
 1416                 }
 1417 #endif
 1418                 break;
 1419         case RA_WLOCKED:
 1420         case RA_WLOCKED | RA_RECURSED:
 1421         case RA_WLOCKED | RA_NOTRECURSED:
 1422                 if (rw_wowner(rw) != curthread)
 1423                         panic("Lock %s not exclusively locked @ %s:%d\n",
 1424                             rw->lock_object.lo_name, file, line);
 1425                 if (rw_recursed(rw)) {
 1426                         if (what & RA_NOTRECURSED)
 1427                                 panic("Lock %s recursed @ %s:%d\n",
 1428                                     rw->lock_object.lo_name, file, line);
 1429                 } else if (what & RA_RECURSED)
 1430                         panic("Lock %s not recursed @ %s:%d\n",
 1431                             rw->lock_object.lo_name, file, line);
 1432                 break;
 1433         case RA_UNLOCKED:
 1434 #ifdef WITNESS
 1435                 witness_assert(&rw->lock_object, what, file, line);
 1436 #else
 1437                 /*
 1438                  * If we hold a write lock fail.  We can't reliably check
 1439                  * to see if we hold a read lock or not.
 1440                  */
 1441                 if (rw_wowner(rw) == curthread)
 1442                         panic("Lock %s exclusively locked @ %s:%d\n",
 1443                             rw->lock_object.lo_name, file, line);
 1444 #endif
 1445                 break;
 1446         default:
 1447                 panic("Unknown rw lock assertion: %d @ %s:%d", what, file,
 1448                     line);
 1449         }
 1450 }
 1451 #endif /* INVARIANT_SUPPORT */
 1452 
 1453 #ifdef DDB
 1454 void
 1455 db_show_rwlock(const struct lock_object *lock)
 1456 {
 1457         const struct rwlock *rw;
 1458         struct thread *td;
 1459 
 1460         rw = (const struct rwlock *)lock;
 1461 
 1462         db_printf(" state: ");
 1463         if (rw->rw_lock == RW_UNLOCKED)
 1464                 db_printf("UNLOCKED\n");
 1465         else if (rw->rw_lock == RW_DESTROYED) {
 1466                 db_printf("DESTROYED\n");
 1467                 return;
 1468         } else if (rw->rw_lock & RW_LOCK_READ)
 1469                 db_printf("RLOCK: %ju locks\n",
 1470                     (uintmax_t)(RW_READERS(rw->rw_lock)));
 1471         else {
 1472                 td = rw_wowner(rw);
 1473                 db_printf("WLOCK: %p (tid %d, pid %d, \"%s\")\n", td,
 1474                     td->td_tid, td->td_proc->p_pid, td->td_name);
 1475                 if (rw_recursed(rw))
 1476                         db_printf(" recursed: %u\n", rw->rw_recurse);
 1477         }
 1478         db_printf(" waiters: ");
 1479         switch (rw->rw_lock & (RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS)) {
 1480         case RW_LOCK_READ_WAITERS:
 1481                 db_printf("readers\n");
 1482                 break;
 1483         case RW_LOCK_WRITE_WAITERS:
 1484                 db_printf("writers\n");
 1485                 break;
 1486         case RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS:
 1487                 db_printf("readers and writers\n");
 1488                 break;
 1489         default:
 1490                 db_printf("none\n");
 1491                 break;
 1492         }
 1493 }
 1494 
 1495 #endif

Cache object: e2b0afbb8389ee2026663d54a958f33c


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.