The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/kern_rwlock.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
    3  *
    4  * Copyright (c) 2006 John Baldwin <jhb@FreeBSD.org>
    5  *
    6  * Redistribution and use in source and binary forms, with or without
    7  * modification, are permitted provided that the following conditions
    8  * are met:
    9  * 1. Redistributions of source code must retain the above copyright
   10  *    notice, this list of conditions and the following disclaimer.
   11  * 2. Redistributions in binary form must reproduce the above copyright
   12  *    notice, this list of conditions and the following disclaimer in the
   13  *    documentation and/or other materials provided with the distribution.
   14  *
   15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   25  * SUCH DAMAGE.
   26  */
   27 
   28 /*
   29  * Machine independent bits of reader/writer lock implementation.
   30  */
   31 
   32 #include <sys/cdefs.h>
   33 __FBSDID("$FreeBSD$");
   34 
   35 #include "opt_ddb.h"
   36 #include "opt_hwpmc_hooks.h"
   37 #include "opt_no_adaptive_rwlocks.h"
   38 
   39 #include <sys/param.h>
   40 #include <sys/kdb.h>
   41 #include <sys/ktr.h>
   42 #include <sys/kernel.h>
   43 #include <sys/lock.h>
   44 #include <sys/mutex.h>
   45 #include <sys/proc.h>
   46 #include <sys/rwlock.h>
   47 #include <sys/sched.h>
   48 #include <sys/smp.h>
   49 #include <sys/sysctl.h>
   50 #include <sys/systm.h>
   51 #include <sys/turnstile.h>
   52 
   53 #include <machine/cpu.h>
   54 
   55 #if defined(SMP) && !defined(NO_ADAPTIVE_RWLOCKS)
   56 #define ADAPTIVE_RWLOCKS
   57 #endif
   58 
   59 #ifdef HWPMC_HOOKS
   60 #include <sys/pmckern.h>
   61 PMC_SOFT_DECLARE( , , lock, failed);
   62 #endif
   63 
   64 /*
   65  * Return the rwlock address when the lock cookie address is provided.
   66  * This functionality assumes that struct rwlock* have a member named rw_lock.
   67  */
   68 #define rwlock2rw(c)    (__containerof(c, struct rwlock, rw_lock))
   69 
   70 #ifdef DDB
   71 #include <ddb/ddb.h>
   72 
   73 static void     db_show_rwlock(const struct lock_object *lock);
   74 #endif
   75 static void     assert_rw(const struct lock_object *lock, int what);
   76 static void     lock_rw(struct lock_object *lock, uintptr_t how);
   77 #ifdef KDTRACE_HOOKS
   78 static int      owner_rw(const struct lock_object *lock, struct thread **owner);
   79 #endif
   80 static uintptr_t unlock_rw(struct lock_object *lock);
   81 
   82 struct lock_class lock_class_rw = {
   83         .lc_name = "rw",
   84         .lc_flags = LC_SLEEPLOCK | LC_RECURSABLE | LC_UPGRADABLE,
   85         .lc_assert = assert_rw,
   86 #ifdef DDB
   87         .lc_ddb_show = db_show_rwlock,
   88 #endif
   89         .lc_lock = lock_rw,
   90         .lc_unlock = unlock_rw,
   91 #ifdef KDTRACE_HOOKS
   92         .lc_owner = owner_rw,
   93 #endif
   94 };
   95 
   96 #ifdef ADAPTIVE_RWLOCKS
   97 static int __read_frequently rowner_retries;
   98 static int __read_frequently rowner_loops;
   99 static SYSCTL_NODE(_debug, OID_AUTO, rwlock, CTLFLAG_RD, NULL,
  100     "rwlock debugging");
  101 SYSCTL_INT(_debug_rwlock, OID_AUTO, retry, CTLFLAG_RW, &rowner_retries, 0, "");
  102 SYSCTL_INT(_debug_rwlock, OID_AUTO, loops, CTLFLAG_RW, &rowner_loops, 0, "");
  103 
  104 static struct lock_delay_config __read_frequently rw_delay;
  105 
  106 SYSCTL_INT(_debug_rwlock, OID_AUTO, delay_base, CTLFLAG_RW, &rw_delay.base,
  107     0, "");
  108 SYSCTL_INT(_debug_rwlock, OID_AUTO, delay_max, CTLFLAG_RW, &rw_delay.max,
  109     0, "");
  110 
  111 static void
  112 rw_lock_delay_init(void *arg __unused)
  113 {
  114 
  115         lock_delay_default_init(&rw_delay);
  116         rowner_retries = 10;
  117         rowner_loops = max(10000, rw_delay.max);
  118 }
  119 LOCK_DELAY_SYSINIT(rw_lock_delay_init);
  120 #endif
  121 
  122 /*
  123  * Return a pointer to the owning thread if the lock is write-locked or
  124  * NULL if the lock is unlocked or read-locked.
  125  */
  126 
  127 #define lv_rw_wowner(v)                                                 \
  128         ((v) & RW_LOCK_READ ? NULL :                                    \
  129          (struct thread *)RW_OWNER((v)))
  130 
  131 #define rw_wowner(rw)   lv_rw_wowner(RW_READ_VALUE(rw))
  132 
  133 /*
  134  * Returns if a write owner is recursed.  Write ownership is not assured
  135  * here and should be previously checked.
  136  */
  137 #define rw_recursed(rw)         ((rw)->rw_recurse != 0)
  138 
  139 /*
  140  * Return true if curthread helds the lock.
  141  */
  142 #define rw_wlocked(rw)          (rw_wowner((rw)) == curthread)
  143 
  144 /*
  145  * Return a pointer to the owning thread for this lock who should receive
  146  * any priority lent by threads that block on this lock.  Currently this
  147  * is identical to rw_wowner().
  148  */
  149 #define rw_owner(rw)            rw_wowner(rw)
  150 
  151 #ifndef INVARIANTS
  152 #define __rw_assert(c, what, file, line)
  153 #endif
  154 
  155 void
  156 assert_rw(const struct lock_object *lock, int what)
  157 {
  158 
  159         rw_assert((const struct rwlock *)lock, what);
  160 }
  161 
  162 void
  163 lock_rw(struct lock_object *lock, uintptr_t how)
  164 {
  165         struct rwlock *rw;
  166 
  167         rw = (struct rwlock *)lock;
  168         if (how)
  169                 rw_rlock(rw);
  170         else
  171                 rw_wlock(rw);
  172 }
  173 
  174 uintptr_t
  175 unlock_rw(struct lock_object *lock)
  176 {
  177         struct rwlock *rw;
  178 
  179         rw = (struct rwlock *)lock;
  180         rw_assert(rw, RA_LOCKED | LA_NOTRECURSED);
  181         if (rw->rw_lock & RW_LOCK_READ) {
  182                 rw_runlock(rw);
  183                 return (1);
  184         } else {
  185                 rw_wunlock(rw);
  186                 return (0);
  187         }
  188 }
  189 
  190 #ifdef KDTRACE_HOOKS
  191 int
  192 owner_rw(const struct lock_object *lock, struct thread **owner)
  193 {
  194         const struct rwlock *rw = (const struct rwlock *)lock;
  195         uintptr_t x = rw->rw_lock;
  196 
  197         *owner = rw_wowner(rw);
  198         return ((x & RW_LOCK_READ) != 0 ?  (RW_READERS(x) != 0) :
  199             (*owner != NULL));
  200 }
  201 #endif
  202 
  203 void
  204 _rw_init_flags(volatile uintptr_t *c, const char *name, int opts)
  205 {
  206         struct rwlock *rw;
  207         int flags;
  208 
  209         rw = rwlock2rw(c);
  210 
  211         MPASS((opts & ~(RW_DUPOK | RW_NOPROFILE | RW_NOWITNESS | RW_QUIET |
  212             RW_RECURSE | RW_NEW)) == 0);
  213         ASSERT_ATOMIC_LOAD_PTR(rw->rw_lock,
  214             ("%s: rw_lock not aligned for %s: %p", __func__, name,
  215             &rw->rw_lock));
  216 
  217         flags = LO_UPGRADABLE;
  218         if (opts & RW_DUPOK)
  219                 flags |= LO_DUPOK;
  220         if (opts & RW_NOPROFILE)
  221                 flags |= LO_NOPROFILE;
  222         if (!(opts & RW_NOWITNESS))
  223                 flags |= LO_WITNESS;
  224         if (opts & RW_RECURSE)
  225                 flags |= LO_RECURSABLE;
  226         if (opts & RW_QUIET)
  227                 flags |= LO_QUIET;
  228         if (opts & RW_NEW)
  229                 flags |= LO_NEW;
  230 
  231         lock_init(&rw->lock_object, &lock_class_rw, name, NULL, flags);
  232         rw->rw_lock = RW_UNLOCKED;
  233         rw->rw_recurse = 0;
  234 }
  235 
  236 void
  237 _rw_destroy(volatile uintptr_t *c)
  238 {
  239         struct rwlock *rw;
  240 
  241         rw = rwlock2rw(c);
  242 
  243         KASSERT(rw->rw_lock == RW_UNLOCKED, ("rw lock %p not unlocked", rw));
  244         KASSERT(rw->rw_recurse == 0, ("rw lock %p still recursed", rw));
  245         rw->rw_lock = RW_DESTROYED;
  246         lock_destroy(&rw->lock_object);
  247 }
  248 
  249 void
  250 rw_sysinit(void *arg)
  251 {
  252         struct rw_args *args;
  253 
  254         args = arg;
  255         rw_init_flags((struct rwlock *)args->ra_rw, args->ra_desc,
  256             args->ra_flags);
  257 }
  258 
  259 int
  260 _rw_wowned(const volatile uintptr_t *c)
  261 {
  262 
  263         return (rw_wowner(rwlock2rw(c)) == curthread);
  264 }
  265 
  266 void
  267 _rw_wlock_cookie(volatile uintptr_t *c, const char *file, int line)
  268 {
  269         struct rwlock *rw;
  270         uintptr_t tid, v;
  271 
  272         rw = rwlock2rw(c);
  273 
  274         KASSERT(kdb_active != 0 || SCHEDULER_STOPPED() ||
  275             !TD_IS_IDLETHREAD(curthread),
  276             ("rw_wlock() by idle thread %p on rwlock %s @ %s:%d",
  277             curthread, rw->lock_object.lo_name, file, line));
  278         KASSERT(rw->rw_lock != RW_DESTROYED,
  279             ("rw_wlock() of destroyed rwlock @ %s:%d", file, line));
  280         WITNESS_CHECKORDER(&rw->lock_object, LOP_NEWORDER | LOP_EXCLUSIVE, file,
  281             line, NULL);
  282         tid = (uintptr_t)curthread;
  283         v = RW_UNLOCKED;
  284         if (!_rw_write_lock_fetch(rw, &v, tid))
  285                 _rw_wlock_hard(rw, v, file, line);
  286         else
  287                 LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(rw__acquire, rw,
  288                     0, 0, file, line, LOCKSTAT_WRITER);
  289 
  290         LOCK_LOG_LOCK("WLOCK", &rw->lock_object, 0, rw->rw_recurse, file, line);
  291         WITNESS_LOCK(&rw->lock_object, LOP_EXCLUSIVE, file, line);
  292         TD_LOCKS_INC(curthread);
  293 }
  294 
  295 int
  296 __rw_try_wlock_int(struct rwlock *rw LOCK_FILE_LINE_ARG_DEF)
  297 {
  298         struct thread *td;
  299         uintptr_t tid, v;
  300         int rval;
  301         bool recursed;
  302 
  303         td = curthread;
  304         tid = (uintptr_t)td;
  305         if (SCHEDULER_STOPPED_TD(td))
  306                 return (1);
  307 
  308         KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(td),
  309             ("rw_try_wlock() by idle thread %p on rwlock %s @ %s:%d",
  310             curthread, rw->lock_object.lo_name, file, line));
  311         KASSERT(rw->rw_lock != RW_DESTROYED,
  312             ("rw_try_wlock() of destroyed rwlock @ %s:%d", file, line));
  313 
  314         rval = 1;
  315         recursed = false;
  316         v = RW_UNLOCKED;
  317         for (;;) {
  318                 if (atomic_fcmpset_acq_ptr(&rw->rw_lock, &v, tid))
  319                         break;
  320                 if (v == RW_UNLOCKED)
  321                         continue;
  322                 if (v == tid && (rw->lock_object.lo_flags & LO_RECURSABLE)) {
  323                         rw->rw_recurse++;
  324                         atomic_set_ptr(&rw->rw_lock, RW_LOCK_WRITER_RECURSED);
  325                         break;
  326                 }
  327                 rval = 0;
  328                 break;
  329         }
  330 
  331         LOCK_LOG_TRY("WLOCK", &rw->lock_object, 0, rval, file, line);
  332         if (rval) {
  333                 WITNESS_LOCK(&rw->lock_object, LOP_EXCLUSIVE | LOP_TRYLOCK,
  334                     file, line);
  335                 if (!recursed)
  336                         LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(rw__acquire,
  337                             rw, 0, 0, file, line, LOCKSTAT_WRITER);
  338                 TD_LOCKS_INC(curthread);
  339         }
  340         return (rval);
  341 }
  342 
  343 int
  344 __rw_try_wlock(volatile uintptr_t *c, const char *file, int line)
  345 {
  346         struct rwlock *rw;
  347 
  348         rw = rwlock2rw(c);
  349         return (__rw_try_wlock_int(rw LOCK_FILE_LINE_ARG));
  350 }
  351 
  352 void
  353 _rw_wunlock_cookie(volatile uintptr_t *c, const char *file, int line)
  354 {
  355         struct rwlock *rw;
  356 
  357         rw = rwlock2rw(c);
  358 
  359         KASSERT(rw->rw_lock != RW_DESTROYED,
  360             ("rw_wunlock() of destroyed rwlock @ %s:%d", file, line));
  361         __rw_assert(c, RA_WLOCKED, file, line);
  362         WITNESS_UNLOCK(&rw->lock_object, LOP_EXCLUSIVE, file, line);
  363         LOCK_LOG_LOCK("WUNLOCK", &rw->lock_object, 0, rw->rw_recurse, file,
  364             line);
  365 
  366 #ifdef LOCK_PROFILING
  367         _rw_wunlock_hard(rw, (uintptr_t)curthread, file, line);
  368 #else
  369         __rw_wunlock(rw, curthread, file, line);
  370 #endif
  371 
  372         TD_LOCKS_DEC(curthread);
  373 }
  374 
  375 /*
  376  * Determines whether a new reader can acquire a lock.  Succeeds if the
  377  * reader already owns a read lock and the lock is locked for read to
  378  * prevent deadlock from reader recursion.  Also succeeds if the lock
  379  * is unlocked and has no writer waiters or spinners.  Failing otherwise
  380  * prioritizes writers before readers.
  381  */
  382 static bool __always_inline
  383 __rw_can_read(struct thread *td, uintptr_t v, bool fp)
  384 {
  385 
  386         if ((v & (RW_LOCK_READ | RW_LOCK_WRITE_WAITERS | RW_LOCK_WRITE_SPINNER))
  387             == RW_LOCK_READ)
  388                 return (true);
  389         if (!fp && td->td_rw_rlocks && (v & RW_LOCK_READ))
  390                 return (true);
  391         return (false);
  392 }
  393 
  394 static bool __always_inline
  395 __rw_rlock_try(struct rwlock *rw, struct thread *td, uintptr_t *vp, bool fp
  396     LOCK_FILE_LINE_ARG_DEF)
  397 {
  398 
  399         /*
  400          * Handle the easy case.  If no other thread has a write
  401          * lock, then try to bump up the count of read locks.  Note
  402          * that we have to preserve the current state of the
  403          * RW_LOCK_WRITE_WAITERS flag.  If we fail to acquire a
  404          * read lock, then rw_lock must have changed, so restart
  405          * the loop.  Note that this handles the case of a
  406          * completely unlocked rwlock since such a lock is encoded
  407          * as a read lock with no waiters.
  408          */
  409         while (__rw_can_read(td, *vp, fp)) {
  410                 if (atomic_fcmpset_acq_ptr(&rw->rw_lock, vp,
  411                         *vp + RW_ONE_READER)) {
  412                         if (LOCK_LOG_TEST(&rw->lock_object, 0))
  413                                 CTR4(KTR_LOCK,
  414                                     "%s: %p succeed %p -> %p", __func__,
  415                                     rw, (void *)*vp,
  416                                     (void *)(*vp + RW_ONE_READER));
  417                         td->td_rw_rlocks++;
  418                         return (true);
  419                 }
  420         }
  421         return (false);
  422 }
  423 
  424 static void __noinline
  425 __rw_rlock_hard(struct rwlock *rw, struct thread *td, uintptr_t v
  426     LOCK_FILE_LINE_ARG_DEF)
  427 {
  428         struct turnstile *ts;
  429         struct thread *owner;
  430 #ifdef ADAPTIVE_RWLOCKS
  431         int spintries = 0;
  432         int i, n;
  433 #endif
  434 #ifdef LOCK_PROFILING
  435         uint64_t waittime = 0;
  436         int contested = 0;
  437 #endif
  438 #if defined(ADAPTIVE_RWLOCKS) || defined(KDTRACE_HOOKS)
  439         struct lock_delay_arg lda;
  440 #endif
  441 #ifdef KDTRACE_HOOKS
  442         u_int sleep_cnt = 0;
  443         int64_t sleep_time = 0;
  444         int64_t all_time = 0;
  445 #endif
  446 #if defined(KDTRACE_HOOKS) || defined(LOCK_PROFILING)
  447         uintptr_t state = 0;
  448         int doing_lockprof = 0;
  449 #endif
  450 
  451 #ifdef KDTRACE_HOOKS
  452         if (LOCKSTAT_PROFILE_ENABLED(rw__acquire)) {
  453                 if (__rw_rlock_try(rw, td, &v, false LOCK_FILE_LINE_ARG))
  454                         goto out_lockstat;
  455                 doing_lockprof = 1;
  456                 all_time -= lockstat_nsecs(&rw->lock_object);
  457                 state = v;
  458         }
  459 #endif
  460 #ifdef LOCK_PROFILING
  461         doing_lockprof = 1;
  462         state = v;
  463 #endif
  464 
  465         if (SCHEDULER_STOPPED())
  466                 return;
  467 
  468 #if defined(ADAPTIVE_RWLOCKS)
  469         lock_delay_arg_init(&lda, &rw_delay);
  470 #elif defined(KDTRACE_HOOKS)
  471         lock_delay_arg_init(&lda, NULL);
  472 #endif
  473 
  474 #ifdef HWPMC_HOOKS
  475         PMC_SOFT_CALL( , , lock, failed);
  476 #endif
  477         lock_profile_obtain_lock_failed(&rw->lock_object,
  478             &contested, &waittime);
  479 
  480         for (;;) {
  481                 if (__rw_rlock_try(rw, td, &v, false LOCK_FILE_LINE_ARG))
  482                         break;
  483 #ifdef KDTRACE_HOOKS
  484                 lda.spin_cnt++;
  485 #endif
  486 
  487 #ifdef ADAPTIVE_RWLOCKS
  488                 /*
  489                  * If the owner is running on another CPU, spin until
  490                  * the owner stops running or the state of the lock
  491                  * changes.
  492                  */
  493                 if ((v & RW_LOCK_READ) == 0) {
  494                         owner = (struct thread *)RW_OWNER(v);
  495                         if (TD_IS_RUNNING(owner)) {
  496                                 if (LOCK_LOG_TEST(&rw->lock_object, 0))
  497                                         CTR3(KTR_LOCK,
  498                                             "%s: spinning on %p held by %p",
  499                                             __func__, rw, owner);
  500                                 KTR_STATE1(KTR_SCHED, "thread",
  501                                     sched_tdname(curthread), "spinning",
  502                                     "lockname:\"%s\"", rw->lock_object.lo_name);
  503                                 do {
  504                                         lock_delay(&lda);
  505                                         v = RW_READ_VALUE(rw);
  506                                         owner = lv_rw_wowner(v);
  507                                 } while (owner != NULL && TD_IS_RUNNING(owner));
  508                                 KTR_STATE0(KTR_SCHED, "thread",
  509                                     sched_tdname(curthread), "running");
  510                                 continue;
  511                         }
  512                 } else {
  513                         if ((v & RW_LOCK_WRITE_SPINNER) && RW_READERS(v) == 0) {
  514                                 MPASS(!__rw_can_read(td, v, false));
  515                                 lock_delay_spin(2);
  516                                 v = RW_READ_VALUE(rw);
  517                                 continue;
  518                         }
  519                         if (spintries < rowner_retries) {
  520                                 spintries++;
  521                                 KTR_STATE1(KTR_SCHED, "thread", sched_tdname(curthread),
  522                                     "spinning", "lockname:\"%s\"",
  523                                     rw->lock_object.lo_name);
  524                                 n = RW_READERS(v);
  525                                 for (i = 0; i < rowner_loops; i += n) {
  526                                         lock_delay_spin(n);
  527                                         v = RW_READ_VALUE(rw);
  528                                         if (!(v & RW_LOCK_READ))
  529                                                 break;
  530                                         n = RW_READERS(v);
  531                                         if (n == 0)
  532                                                 break;
  533                                         if (__rw_can_read(td, v, false))
  534                                                 break;
  535                                 }
  536 #ifdef KDTRACE_HOOKS
  537                                 lda.spin_cnt += rowner_loops - i;
  538 #endif
  539                                 KTR_STATE0(KTR_SCHED, "thread", sched_tdname(curthread),
  540                                     "running");
  541                                 if (i < rowner_loops)
  542                                         continue;
  543                         }
  544                 }
  545 #endif
  546 
  547                 /*
  548                  * Okay, now it's the hard case.  Some other thread already
  549                  * has a write lock or there are write waiters present,
  550                  * acquire the turnstile lock so we can begin the process
  551                  * of blocking.
  552                  */
  553                 ts = turnstile_trywait(&rw->lock_object);
  554 
  555                 /*
  556                  * The lock might have been released while we spun, so
  557                  * recheck its state and restart the loop if needed.
  558                  */
  559                 v = RW_READ_VALUE(rw);
  560 retry_ts:
  561                 if (((v & RW_LOCK_WRITE_SPINNER) && RW_READERS(v) == 0) ||
  562                     __rw_can_read(td, v, false)) {
  563                         turnstile_cancel(ts);
  564                         continue;
  565                 }
  566 
  567                 owner = lv_rw_wowner(v);
  568 
  569 #ifdef ADAPTIVE_RWLOCKS
  570                 /*
  571                  * The current lock owner might have started executing
  572                  * on another CPU (or the lock could have changed
  573                  * owners) while we were waiting on the turnstile
  574                  * chain lock.  If so, drop the turnstile lock and try
  575                  * again.
  576                  */
  577                 if (owner != NULL) {
  578                         if (TD_IS_RUNNING(owner)) {
  579                                 turnstile_cancel(ts);
  580                                 continue;
  581                         }
  582                 }
  583 #endif
  584 
  585                 /*
  586                  * The lock is held in write mode or it already has waiters.
  587                  */
  588                 MPASS(!__rw_can_read(td, v, false));
  589 
  590                 /*
  591                  * If the RW_LOCK_READ_WAITERS flag is already set, then
  592                  * we can go ahead and block.  If it is not set then try
  593                  * to set it.  If we fail to set it drop the turnstile
  594                  * lock and restart the loop.
  595                  */
  596                 if (!(v & RW_LOCK_READ_WAITERS)) {
  597                         if (!atomic_fcmpset_ptr(&rw->rw_lock, &v,
  598                             v | RW_LOCK_READ_WAITERS))
  599                                 goto retry_ts;
  600                         if (LOCK_LOG_TEST(&rw->lock_object, 0))
  601                                 CTR2(KTR_LOCK, "%s: %p set read waiters flag",
  602                                     __func__, rw);
  603                 }
  604 
  605                 /*
  606                  * We were unable to acquire the lock and the read waiters
  607                  * flag is set, so we must block on the turnstile.
  608                  */
  609                 if (LOCK_LOG_TEST(&rw->lock_object, 0))
  610                         CTR2(KTR_LOCK, "%s: %p blocking on turnstile", __func__,
  611                             rw);
  612 #ifdef KDTRACE_HOOKS
  613                 sleep_time -= lockstat_nsecs(&rw->lock_object);
  614 #endif
  615                 MPASS(owner == rw_owner(rw));
  616                 turnstile_wait(ts, owner, TS_SHARED_QUEUE);
  617 #ifdef KDTRACE_HOOKS
  618                 sleep_time += lockstat_nsecs(&rw->lock_object);
  619                 sleep_cnt++;
  620 #endif
  621                 if (LOCK_LOG_TEST(&rw->lock_object, 0))
  622                         CTR2(KTR_LOCK, "%s: %p resuming from turnstile",
  623                             __func__, rw);
  624                 v = RW_READ_VALUE(rw);
  625         }
  626 #if defined(KDTRACE_HOOKS) || defined(LOCK_PROFILING)
  627         if (__predict_true(!doing_lockprof))
  628                 return;
  629 #endif
  630 #ifdef KDTRACE_HOOKS
  631         all_time += lockstat_nsecs(&rw->lock_object);
  632         if (sleep_time)
  633                 LOCKSTAT_RECORD4(rw__block, rw, sleep_time,
  634                     LOCKSTAT_READER, (state & RW_LOCK_READ) == 0,
  635                     (state & RW_LOCK_READ) == 0 ? 0 : RW_READERS(state));
  636 
  637         /* Record only the loops spinning and not sleeping. */
  638         if (lda.spin_cnt > sleep_cnt)
  639                 LOCKSTAT_RECORD4(rw__spin, rw, all_time - sleep_time,
  640                     LOCKSTAT_READER, (state & RW_LOCK_READ) == 0,
  641                     (state & RW_LOCK_READ) == 0 ? 0 : RW_READERS(state));
  642 out_lockstat:
  643 #endif
  644         /*
  645          * TODO: acquire "owner of record" here.  Here be turnstile dragons
  646          * however.  turnstiles don't like owners changing between calls to
  647          * turnstile_wait() currently.
  648          */
  649         LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(rw__acquire, rw, contested,
  650             waittime, file, line, LOCKSTAT_READER);
  651 }
  652 
  653 void
  654 __rw_rlock_int(struct rwlock *rw LOCK_FILE_LINE_ARG_DEF)
  655 {
  656         struct thread *td;
  657         uintptr_t v;
  658 
  659         td = curthread;
  660 
  661         KASSERT(kdb_active != 0 || SCHEDULER_STOPPED_TD(td) ||
  662             !TD_IS_IDLETHREAD(td),
  663             ("rw_rlock() by idle thread %p on rwlock %s @ %s:%d",
  664             td, rw->lock_object.lo_name, file, line));
  665         KASSERT(rw->rw_lock != RW_DESTROYED,
  666             ("rw_rlock() of destroyed rwlock @ %s:%d", file, line));
  667         KASSERT(rw_wowner(rw) != td,
  668             ("rw_rlock: wlock already held for %s @ %s:%d",
  669             rw->lock_object.lo_name, file, line));
  670         WITNESS_CHECKORDER(&rw->lock_object, LOP_NEWORDER, file, line, NULL);
  671 
  672         v = RW_READ_VALUE(rw);
  673         if (__predict_false(LOCKSTAT_PROFILE_ENABLED(rw__acquire) ||
  674             !__rw_rlock_try(rw, td, &v, true LOCK_FILE_LINE_ARG)))
  675                 __rw_rlock_hard(rw, td, v LOCK_FILE_LINE_ARG);
  676         else
  677                 lock_profile_obtain_lock_success(&rw->lock_object, 0, 0,
  678                     file, line);
  679 
  680         LOCK_LOG_LOCK("RLOCK", &rw->lock_object, 0, 0, file, line);
  681         WITNESS_LOCK(&rw->lock_object, 0, file, line);
  682         TD_LOCKS_INC(curthread);
  683 }
  684 
  685 void
  686 __rw_rlock(volatile uintptr_t *c, const char *file, int line)
  687 {
  688         struct rwlock *rw;
  689 
  690         rw = rwlock2rw(c);
  691         __rw_rlock_int(rw LOCK_FILE_LINE_ARG);
  692 }
  693 
  694 int
  695 __rw_try_rlock_int(struct rwlock *rw LOCK_FILE_LINE_ARG_DEF)
  696 {
  697         uintptr_t x;
  698 
  699         if (SCHEDULER_STOPPED())
  700                 return (1);
  701 
  702         KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread),
  703             ("rw_try_rlock() by idle thread %p on rwlock %s @ %s:%d",
  704             curthread, rw->lock_object.lo_name, file, line));
  705 
  706         x = rw->rw_lock;
  707         for (;;) {
  708                 KASSERT(rw->rw_lock != RW_DESTROYED,
  709                     ("rw_try_rlock() of destroyed rwlock @ %s:%d", file, line));
  710                 if (!(x & RW_LOCK_READ))
  711                         break;
  712                 if (atomic_fcmpset_acq_ptr(&rw->rw_lock, &x, x + RW_ONE_READER)) {
  713                         LOCK_LOG_TRY("RLOCK", &rw->lock_object, 0, 1, file,
  714                             line);
  715                         WITNESS_LOCK(&rw->lock_object, LOP_TRYLOCK, file, line);
  716                         LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(rw__acquire,
  717                             rw, 0, 0, file, line, LOCKSTAT_READER);
  718                         TD_LOCKS_INC(curthread);
  719                         curthread->td_rw_rlocks++;
  720                         return (1);
  721                 }
  722         }
  723 
  724         LOCK_LOG_TRY("RLOCK", &rw->lock_object, 0, 0, file, line);
  725         return (0);
  726 }
  727 
  728 int
  729 __rw_try_rlock(volatile uintptr_t *c, const char *file, int line)
  730 {
  731         struct rwlock *rw;
  732 
  733         rw = rwlock2rw(c);
  734         return (__rw_try_rlock_int(rw LOCK_FILE_LINE_ARG));
  735 }
  736 
  737 static bool __always_inline
  738 __rw_runlock_try(struct rwlock *rw, struct thread *td, uintptr_t *vp)
  739 {
  740 
  741         for (;;) {
  742                 if (RW_READERS(*vp) > 1 || !(*vp & RW_LOCK_WAITERS)) {
  743                         if (atomic_fcmpset_rel_ptr(&rw->rw_lock, vp,
  744                             *vp - RW_ONE_READER)) {
  745                                 if (LOCK_LOG_TEST(&rw->lock_object, 0))
  746                                         CTR4(KTR_LOCK,
  747                                             "%s: %p succeeded %p -> %p",
  748                                             __func__, rw, (void *)*vp,
  749                                             (void *)(*vp - RW_ONE_READER));
  750                                 td->td_rw_rlocks--;
  751                                 return (true);
  752                         }
  753                         continue;
  754                 }
  755                 break;
  756         }
  757         return (false);
  758 }
  759 
  760 static void __noinline
  761 __rw_runlock_hard(struct rwlock *rw, struct thread *td, uintptr_t v
  762     LOCK_FILE_LINE_ARG_DEF)
  763 {
  764         struct turnstile *ts;
  765         uintptr_t setv, queue;
  766 
  767         if (SCHEDULER_STOPPED())
  768                 return;
  769 
  770         if (__rw_runlock_try(rw, td, &v))
  771                 goto out_lockstat;
  772 
  773         /*
  774          * Ok, we know we have waiters and we think we are the
  775          * last reader, so grab the turnstile lock.
  776          */
  777         turnstile_chain_lock(&rw->lock_object);
  778         v = RW_READ_VALUE(rw);
  779         for (;;) {
  780                 if (__rw_runlock_try(rw, td, &v))
  781                         break;
  782 
  783                 MPASS(v & RW_LOCK_WAITERS);
  784 
  785                 /*
  786                  * Try to drop our lock leaving the lock in a unlocked
  787                  * state.
  788                  *
  789                  * If you wanted to do explicit lock handoff you'd have to
  790                  * do it here.  You'd also want to use turnstile_signal()
  791                  * and you'd have to handle the race where a higher
  792                  * priority thread blocks on the write lock before the
  793                  * thread you wakeup actually runs and have the new thread
  794                  * "steal" the lock.  For now it's a lot simpler to just
  795                  * wakeup all of the waiters.
  796                  *
  797                  * As above, if we fail, then another thread might have
  798                  * acquired a read lock, so drop the turnstile lock and
  799                  * restart.
  800                  */
  801                 setv = RW_UNLOCKED;
  802                 queue = TS_SHARED_QUEUE;
  803                 if (v & RW_LOCK_WRITE_WAITERS) {
  804                         queue = TS_EXCLUSIVE_QUEUE;
  805                         setv |= (v & RW_LOCK_READ_WAITERS);
  806                 }
  807                 setv |= (v & RW_LOCK_WRITE_SPINNER);
  808                 if (!atomic_fcmpset_rel_ptr(&rw->rw_lock, &v, setv))
  809                         continue;
  810                 if (LOCK_LOG_TEST(&rw->lock_object, 0))
  811                         CTR2(KTR_LOCK, "%s: %p last succeeded with waiters",
  812                             __func__, rw);
  813 
  814                 /*
  815                  * Ok.  The lock is released and all that's left is to
  816                  * wake up the waiters.  Note that the lock might not be
  817                  * free anymore, but in that case the writers will just
  818                  * block again if they run before the new lock holder(s)
  819                  * release the lock.
  820                  */
  821                 ts = turnstile_lookup(&rw->lock_object);
  822                 MPASS(ts != NULL);
  823                 turnstile_broadcast(ts, queue);
  824                 turnstile_unpend(ts);
  825                 td->td_rw_rlocks--;
  826                 break;
  827         }
  828         turnstile_chain_unlock(&rw->lock_object);
  829 out_lockstat:
  830         LOCKSTAT_PROFILE_RELEASE_RWLOCK(rw__release, rw, LOCKSTAT_READER);
  831 }
  832 
  833 void
  834 _rw_runlock_cookie_int(struct rwlock *rw LOCK_FILE_LINE_ARG_DEF)
  835 {
  836         struct thread *td;
  837         uintptr_t v;
  838 
  839         KASSERT(rw->rw_lock != RW_DESTROYED,
  840             ("rw_runlock() of destroyed rwlock @ %s:%d", file, line));
  841         __rw_assert(&rw->rw_lock, RA_RLOCKED, file, line);
  842         WITNESS_UNLOCK(&rw->lock_object, 0, file, line);
  843         LOCK_LOG_LOCK("RUNLOCK", &rw->lock_object, 0, 0, file, line);
  844 
  845         td = curthread;
  846         v = RW_READ_VALUE(rw);
  847 
  848         if (__predict_false(LOCKSTAT_PROFILE_ENABLED(rw__release) ||
  849             !__rw_runlock_try(rw, td, &v)))
  850                 __rw_runlock_hard(rw, td, v LOCK_FILE_LINE_ARG);
  851         else
  852                 lock_profile_release_lock(&rw->lock_object);
  853 
  854         TD_LOCKS_DEC(curthread);
  855 }
  856 
  857 void
  858 _rw_runlock_cookie(volatile uintptr_t *c, const char *file, int line)
  859 {
  860         struct rwlock *rw;
  861 
  862         rw = rwlock2rw(c);
  863         _rw_runlock_cookie_int(rw LOCK_FILE_LINE_ARG);
  864 }
  865 
  866 #ifdef ADAPTIVE_RWLOCKS
  867 static inline void
  868 rw_drop_critical(uintptr_t v, bool *in_critical, int *extra_work)
  869 {
  870 
  871         if (v & RW_LOCK_WRITE_SPINNER)
  872                 return;
  873         if (*in_critical) {
  874                 critical_exit();
  875                 *in_critical = false;
  876                 (*extra_work)--;
  877         }
  878 }
  879 #else
  880 #define rw_drop_critical(v, in_critical, extra_work) do { } while (0)
  881 #endif
  882 
  883 /*
  884  * This function is called when we are unable to obtain a write lock on the
  885  * first try.  This means that at least one other thread holds either a
  886  * read or write lock.
  887  */
  888 void
  889 __rw_wlock_hard(volatile uintptr_t *c, uintptr_t v LOCK_FILE_LINE_ARG_DEF)
  890 {
  891         uintptr_t tid;
  892         struct rwlock *rw;
  893         struct turnstile *ts;
  894         struct thread *owner;
  895 #ifdef ADAPTIVE_RWLOCKS
  896         int spintries = 0;
  897         int i, n;
  898         enum { READERS, WRITER } sleep_reason = READERS;
  899         bool in_critical = false;
  900 #endif
  901         uintptr_t setv;
  902 #ifdef LOCK_PROFILING
  903         uint64_t waittime = 0;
  904         int contested = 0;
  905 #endif
  906 #if defined(ADAPTIVE_RWLOCKS) || defined(KDTRACE_HOOKS)
  907         struct lock_delay_arg lda;
  908 #endif
  909 #ifdef KDTRACE_HOOKS
  910         u_int sleep_cnt = 0;
  911         int64_t sleep_time = 0;
  912         int64_t all_time = 0;
  913 #endif
  914 #if defined(KDTRACE_HOOKS) || defined(LOCK_PROFILING)
  915         uintptr_t state = 0;
  916         int doing_lockprof = 0;
  917 #endif
  918         int extra_work = 0;
  919 
  920         tid = (uintptr_t)curthread;
  921         rw = rwlock2rw(c);
  922 
  923 #ifdef KDTRACE_HOOKS
  924         if (LOCKSTAT_PROFILE_ENABLED(rw__acquire)) {
  925                 while (v == RW_UNLOCKED) {
  926                         if (_rw_write_lock_fetch(rw, &v, tid))
  927                                 goto out_lockstat;
  928                 }
  929                 extra_work = 1;
  930                 doing_lockprof = 1;
  931                 all_time -= lockstat_nsecs(&rw->lock_object);
  932                 state = v;
  933         }
  934 #endif
  935 #ifdef LOCK_PROFILING
  936         extra_work = 1;
  937         doing_lockprof = 1;
  938         state = v;
  939 #endif
  940 
  941         if (SCHEDULER_STOPPED())
  942                 return;
  943 
  944 #if defined(ADAPTIVE_RWLOCKS)
  945         lock_delay_arg_init(&lda, &rw_delay);
  946 #elif defined(KDTRACE_HOOKS)
  947         lock_delay_arg_init(&lda, NULL);
  948 #endif
  949         if (__predict_false(v == RW_UNLOCKED))
  950                 v = RW_READ_VALUE(rw);
  951 
  952         if (__predict_false(lv_rw_wowner(v) == (struct thread *)tid)) {
  953                 KASSERT(rw->lock_object.lo_flags & LO_RECURSABLE,
  954                     ("%s: recursing but non-recursive rw %s @ %s:%d\n",
  955                     __func__, rw->lock_object.lo_name, file, line));
  956                 rw->rw_recurse++;
  957                 atomic_set_ptr(&rw->rw_lock, RW_LOCK_WRITER_RECURSED);
  958                 if (LOCK_LOG_TEST(&rw->lock_object, 0))
  959                         CTR2(KTR_LOCK, "%s: %p recursing", __func__, rw);
  960                 return;
  961         }
  962 
  963         if (LOCK_LOG_TEST(&rw->lock_object, 0))
  964                 CTR5(KTR_LOCK, "%s: %s contested (lock=%p) at %s:%d", __func__,
  965                     rw->lock_object.lo_name, (void *)rw->rw_lock, file, line);
  966 
  967 #ifdef HWPMC_HOOKS
  968         PMC_SOFT_CALL( , , lock, failed);
  969 #endif
  970         lock_profile_obtain_lock_failed(&rw->lock_object,
  971             &contested, &waittime);
  972 
  973         for (;;) {
  974                 if (v == RW_UNLOCKED) {
  975                         if (_rw_write_lock_fetch(rw, &v, tid))
  976                                 break;
  977                         continue;
  978                 }
  979 #ifdef KDTRACE_HOOKS
  980                 lda.spin_cnt++;
  981 #endif
  982 
  983 #ifdef ADAPTIVE_RWLOCKS
  984                 if (v == (RW_LOCK_READ | RW_LOCK_WRITE_SPINNER)) {
  985                         if (atomic_fcmpset_acq_ptr(&rw->rw_lock, &v, tid))
  986                                 break;
  987                         continue;
  988                 }
  989 
  990                 /*
  991                  * If the lock is write locked and the owner is
  992                  * running on another CPU, spin until the owner stops
  993                  * running or the state of the lock changes.
  994                  */
  995                 if (!(v & RW_LOCK_READ)) {
  996                         rw_drop_critical(v, &in_critical, &extra_work);
  997                         sleep_reason = WRITER;
  998                         owner = lv_rw_wowner(v);
  999                         if (!TD_IS_RUNNING(owner))
 1000                                 goto ts;
 1001                         if (LOCK_LOG_TEST(&rw->lock_object, 0))
 1002                                 CTR3(KTR_LOCK, "%s: spinning on %p held by %p",
 1003                                     __func__, rw, owner);
 1004                         KTR_STATE1(KTR_SCHED, "thread", sched_tdname(curthread),
 1005                             "spinning", "lockname:\"%s\"",
 1006                             rw->lock_object.lo_name);
 1007                         do {
 1008                                 lock_delay(&lda);
 1009                                 v = RW_READ_VALUE(rw);
 1010                                 owner = lv_rw_wowner(v);
 1011                         } while (owner != NULL && TD_IS_RUNNING(owner));
 1012                         KTR_STATE0(KTR_SCHED, "thread", sched_tdname(curthread),
 1013                             "running");
 1014                         continue;
 1015                 } else if (RW_READERS(v) > 0) {
 1016                         sleep_reason = READERS;
 1017                         if (spintries == rowner_retries)
 1018                                 goto ts;
 1019                         if (!(v & RW_LOCK_WRITE_SPINNER)) {
 1020                                 if (!in_critical) {
 1021                                         critical_enter();
 1022                                         in_critical = true;
 1023                                         extra_work++;
 1024                                 }
 1025                                 if (!atomic_fcmpset_ptr(&rw->rw_lock, &v,
 1026                                     v | RW_LOCK_WRITE_SPINNER)) {
 1027                                         critical_exit();
 1028                                         in_critical = false;
 1029                                         extra_work--;
 1030                                         continue;
 1031                                 }
 1032                         }
 1033                         spintries++;
 1034                         KTR_STATE1(KTR_SCHED, "thread", sched_tdname(curthread),
 1035                             "spinning", "lockname:\"%s\"",
 1036                             rw->lock_object.lo_name);
 1037                         n = RW_READERS(v);
 1038                         for (i = 0; i < rowner_loops; i += n) {
 1039                                 lock_delay_spin(n);
 1040                                 v = RW_READ_VALUE(rw);
 1041                                 if (!(v & RW_LOCK_WRITE_SPINNER))
 1042                                         break;
 1043                                 if (!(v & RW_LOCK_READ))
 1044                                         break;
 1045                                 n = RW_READERS(v);
 1046                                 if (n == 0)
 1047                                         break;
 1048                         }
 1049 #ifdef KDTRACE_HOOKS
 1050                         lda.spin_cnt += i;
 1051 #endif
 1052                         KTR_STATE0(KTR_SCHED, "thread", sched_tdname(curthread),
 1053                             "running");
 1054                         if (i < rowner_loops)
 1055                                 continue;
 1056                 }
 1057 ts:
 1058 #endif
 1059                 ts = turnstile_trywait(&rw->lock_object);
 1060                 v = RW_READ_VALUE(rw);
 1061 retry_ts:
 1062                 owner = lv_rw_wowner(v);
 1063 
 1064 #ifdef ADAPTIVE_RWLOCKS
 1065                 /*
 1066                  * The current lock owner might have started executing
 1067                  * on another CPU (or the lock could have changed
 1068                  * owners) while we were waiting on the turnstile
 1069                  * chain lock.  If so, drop the turnstile lock and try
 1070                  * again.
 1071                  */
 1072                 if (owner != NULL) {
 1073                         if (TD_IS_RUNNING(owner)) {
 1074                                 turnstile_cancel(ts);
 1075                                 rw_drop_critical(v, &in_critical, &extra_work);
 1076                                 continue;
 1077                         }
 1078                 } else if (RW_READERS(v) > 0 && sleep_reason == WRITER) {
 1079                         turnstile_cancel(ts);
 1080                         rw_drop_critical(v, &in_critical, &extra_work);
 1081                         continue;
 1082                 }
 1083 #endif
 1084                 /*
 1085                  * Check for the waiters flags about this rwlock.
 1086                  * If the lock was released, without maintain any pending
 1087                  * waiters queue, simply try to acquire it.
 1088                  * If a pending waiters queue is present, claim the lock
 1089                  * ownership and maintain the pending queue.
 1090                  */
 1091                 setv = v & (RW_LOCK_WAITERS | RW_LOCK_WRITE_SPINNER);
 1092                 if ((v & ~setv) == RW_UNLOCKED) {
 1093                         setv &= ~RW_LOCK_WRITE_SPINNER;
 1094                         if (atomic_fcmpset_acq_ptr(&rw->rw_lock, &v, tid | setv)) {
 1095                                 if (setv)
 1096                                         turnstile_claim(ts);
 1097                                 else
 1098                                         turnstile_cancel(ts);
 1099                                 break;
 1100                         }
 1101                         goto retry_ts;
 1102                 }
 1103 
 1104 #ifdef ADAPTIVE_RWLOCKS
 1105                 if (in_critical) {
 1106                         if ((v & RW_LOCK_WRITE_SPINNER) ||
 1107                             !((v & RW_LOCK_WRITE_WAITERS))) {
 1108                                 setv = v & ~RW_LOCK_WRITE_SPINNER;
 1109                                 setv |= RW_LOCK_WRITE_WAITERS;
 1110                                 if (!atomic_fcmpset_ptr(&rw->rw_lock, &v, setv))
 1111                                         goto retry_ts;
 1112                         }
 1113                         critical_exit();
 1114                         in_critical = false;
 1115                         extra_work--;
 1116                 } else {
 1117 #endif
 1118                         /*
 1119                          * If the RW_LOCK_WRITE_WAITERS flag isn't set, then try to
 1120                          * set it.  If we fail to set it, then loop back and try
 1121                          * again.
 1122                          */
 1123                         if (!(v & RW_LOCK_WRITE_WAITERS)) {
 1124                                 if (!atomic_fcmpset_ptr(&rw->rw_lock, &v,
 1125                                     v | RW_LOCK_WRITE_WAITERS))
 1126                                         goto retry_ts;
 1127                                 if (LOCK_LOG_TEST(&rw->lock_object, 0))
 1128                                         CTR2(KTR_LOCK, "%s: %p set write waiters flag",
 1129                                             __func__, rw);
 1130                         }
 1131 #ifdef ADAPTIVE_RWLOCKS
 1132                 }
 1133 #endif
 1134                 /*
 1135                  * We were unable to acquire the lock and the write waiters
 1136                  * flag is set, so we must block on the turnstile.
 1137                  */
 1138                 if (LOCK_LOG_TEST(&rw->lock_object, 0))
 1139                         CTR2(KTR_LOCK, "%s: %p blocking on turnstile", __func__,
 1140                             rw);
 1141 #ifdef KDTRACE_HOOKS
 1142                 sleep_time -= lockstat_nsecs(&rw->lock_object);
 1143 #endif
 1144                 MPASS(owner == rw_owner(rw));
 1145                 turnstile_wait(ts, owner, TS_EXCLUSIVE_QUEUE);
 1146 #ifdef KDTRACE_HOOKS
 1147                 sleep_time += lockstat_nsecs(&rw->lock_object);
 1148                 sleep_cnt++;
 1149 #endif
 1150                 if (LOCK_LOG_TEST(&rw->lock_object, 0))
 1151                         CTR2(KTR_LOCK, "%s: %p resuming from turnstile",
 1152                             __func__, rw);
 1153 #ifdef ADAPTIVE_RWLOCKS
 1154                 spintries = 0;
 1155 #endif
 1156                 v = RW_READ_VALUE(rw);
 1157         }
 1158         if (__predict_true(!extra_work))
 1159                 return;
 1160 #ifdef ADAPTIVE_RWLOCKS
 1161         if (in_critical)
 1162                 critical_exit();
 1163 #endif
 1164 #if defined(KDTRACE_HOOKS) || defined(LOCK_PROFILING)
 1165         if (__predict_true(!doing_lockprof))
 1166                 return;
 1167 #endif
 1168 #ifdef KDTRACE_HOOKS
 1169         all_time += lockstat_nsecs(&rw->lock_object);
 1170         if (sleep_time)
 1171                 LOCKSTAT_RECORD4(rw__block, rw, sleep_time,
 1172                     LOCKSTAT_WRITER, (state & RW_LOCK_READ) == 0,
 1173                     (state & RW_LOCK_READ) == 0 ? 0 : RW_READERS(state));
 1174 
 1175         /* Record only the loops spinning and not sleeping. */
 1176         if (lda.spin_cnt > sleep_cnt)
 1177                 LOCKSTAT_RECORD4(rw__spin, rw, all_time - sleep_time,
 1178                     LOCKSTAT_WRITER, (state & RW_LOCK_READ) == 0,
 1179                     (state & RW_LOCK_READ) == 0 ? 0 : RW_READERS(state));
 1180 out_lockstat:
 1181 #endif
 1182         LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(rw__acquire, rw, contested,
 1183             waittime, file, line, LOCKSTAT_WRITER);
 1184 }
 1185 
 1186 /*
 1187  * This function is called if lockstat is active or the first try at releasing
 1188  * a write lock failed.  The latter means that the lock is recursed or one of
 1189  * the 2 waiter bits must be set indicating that at least one thread is waiting
 1190  * on this lock.
 1191  */
 1192 void
 1193 __rw_wunlock_hard(volatile uintptr_t *c, uintptr_t v LOCK_FILE_LINE_ARG_DEF)
 1194 {
 1195         struct rwlock *rw;
 1196         struct turnstile *ts;
 1197         uintptr_t tid, setv;
 1198         int queue;
 1199 
 1200         tid = (uintptr_t)curthread;
 1201         if (SCHEDULER_STOPPED())
 1202                 return;
 1203 
 1204         rw = rwlock2rw(c);
 1205         if (__predict_false(v == tid))
 1206                 v = RW_READ_VALUE(rw);
 1207 
 1208         if (v & RW_LOCK_WRITER_RECURSED) {
 1209                 if (--(rw->rw_recurse) == 0)
 1210                         atomic_clear_ptr(&rw->rw_lock, RW_LOCK_WRITER_RECURSED);
 1211                 if (LOCK_LOG_TEST(&rw->lock_object, 0))
 1212                         CTR2(KTR_LOCK, "%s: %p unrecursing", __func__, rw);
 1213                 return;
 1214         }
 1215 
 1216         LOCKSTAT_PROFILE_RELEASE_RWLOCK(rw__release, rw, LOCKSTAT_WRITER);
 1217         if (v == tid && _rw_write_unlock(rw, tid))
 1218                 return;
 1219 
 1220         KASSERT(rw->rw_lock & (RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS),
 1221             ("%s: neither of the waiter flags are set", __func__));
 1222 
 1223         if (LOCK_LOG_TEST(&rw->lock_object, 0))
 1224                 CTR2(KTR_LOCK, "%s: %p contested", __func__, rw);
 1225 
 1226         turnstile_chain_lock(&rw->lock_object);
 1227 
 1228         /*
 1229          * Use the same algo as sx locks for now.  Prefer waking up shared
 1230          * waiters if we have any over writers.  This is probably not ideal.
 1231          *
 1232          * 'v' is the value we are going to write back to rw_lock.  If we
 1233          * have waiters on both queues, we need to preserve the state of
 1234          * the waiter flag for the queue we don't wake up.  For now this is
 1235          * hardcoded for the algorithm mentioned above.
 1236          *
 1237          * In the case of both readers and writers waiting we wakeup the
 1238          * readers but leave the RW_LOCK_WRITE_WAITERS flag set.  If a
 1239          * new writer comes in before a reader it will claim the lock up
 1240          * above.  There is probably a potential priority inversion in
 1241          * there that could be worked around either by waking both queues
 1242          * of waiters or doing some complicated lock handoff gymnastics.
 1243          */
 1244         setv = RW_UNLOCKED;
 1245         v = RW_READ_VALUE(rw);
 1246         queue = TS_SHARED_QUEUE;
 1247         if (v & RW_LOCK_WRITE_WAITERS) {
 1248                 queue = TS_EXCLUSIVE_QUEUE;
 1249                 setv |= (v & RW_LOCK_READ_WAITERS);
 1250         }
 1251         atomic_store_rel_ptr(&rw->rw_lock, setv);
 1252 
 1253         /* Wake up all waiters for the specific queue. */
 1254         if (LOCK_LOG_TEST(&rw->lock_object, 0))
 1255                 CTR3(KTR_LOCK, "%s: %p waking up %s waiters", __func__, rw,
 1256                     queue == TS_SHARED_QUEUE ? "read" : "write");
 1257 
 1258         ts = turnstile_lookup(&rw->lock_object);
 1259         MPASS(ts != NULL);
 1260         turnstile_broadcast(ts, queue);
 1261         turnstile_unpend(ts);
 1262         turnstile_chain_unlock(&rw->lock_object);
 1263 }
 1264 
 1265 /*
 1266  * Attempt to do a non-blocking upgrade from a read lock to a write
 1267  * lock.  This will only succeed if this thread holds a single read
 1268  * lock.  Returns true if the upgrade succeeded and false otherwise.
 1269  */
 1270 int
 1271 __rw_try_upgrade_int(struct rwlock *rw LOCK_FILE_LINE_ARG_DEF)
 1272 {
 1273         uintptr_t v, setv, tid;
 1274         struct turnstile *ts;
 1275         int success;
 1276 
 1277         if (SCHEDULER_STOPPED())
 1278                 return (1);
 1279 
 1280         KASSERT(rw->rw_lock != RW_DESTROYED,
 1281             ("rw_try_upgrade() of destroyed rwlock @ %s:%d", file, line));
 1282         __rw_assert(&rw->rw_lock, RA_RLOCKED, file, line);
 1283 
 1284         /*
 1285          * Attempt to switch from one reader to a writer.  If there
 1286          * are any write waiters, then we will have to lock the
 1287          * turnstile first to prevent races with another writer
 1288          * calling turnstile_wait() before we have claimed this
 1289          * turnstile.  So, do the simple case of no waiters first.
 1290          */
 1291         tid = (uintptr_t)curthread;
 1292         success = 0;
 1293         v = RW_READ_VALUE(rw);
 1294         for (;;) {
 1295                 if (RW_READERS(v) > 1)
 1296                         break;
 1297                 if (!(v & RW_LOCK_WAITERS)) {
 1298                         success = atomic_fcmpset_acq_ptr(&rw->rw_lock, &v, tid);
 1299                         if (!success)
 1300                                 continue;
 1301                         break;
 1302                 }
 1303 
 1304                 /*
 1305                  * Ok, we think we have waiters, so lock the turnstile.
 1306                  */
 1307                 ts = turnstile_trywait(&rw->lock_object);
 1308                 v = RW_READ_VALUE(rw);
 1309 retry_ts:
 1310                 if (RW_READERS(v) > 1) {
 1311                         turnstile_cancel(ts);
 1312                         break;
 1313                 }
 1314                 /*
 1315                  * Try to switch from one reader to a writer again.  This time
 1316                  * we honor the current state of the waiters flags.
 1317                  * If we obtain the lock with the flags set, then claim
 1318                  * ownership of the turnstile.
 1319                  */
 1320                 setv = tid | (v & RW_LOCK_WAITERS);
 1321                 success = atomic_fcmpset_ptr(&rw->rw_lock, &v, setv);
 1322                 if (success) {
 1323                         if (v & RW_LOCK_WAITERS)
 1324                                 turnstile_claim(ts);
 1325                         else
 1326                                 turnstile_cancel(ts);
 1327                         break;
 1328                 }
 1329                 goto retry_ts;
 1330         }
 1331         LOCK_LOG_TRY("WUPGRADE", &rw->lock_object, 0, success, file, line);
 1332         if (success) {
 1333                 curthread->td_rw_rlocks--;
 1334                 WITNESS_UPGRADE(&rw->lock_object, LOP_EXCLUSIVE | LOP_TRYLOCK,
 1335                     file, line);
 1336                 LOCKSTAT_RECORD0(rw__upgrade, rw);
 1337         }
 1338         return (success);
 1339 }
 1340 
 1341 int
 1342 __rw_try_upgrade(volatile uintptr_t *c, const char *file, int line)
 1343 {
 1344         struct rwlock *rw;
 1345 
 1346         rw = rwlock2rw(c);
 1347         return (__rw_try_upgrade_int(rw LOCK_FILE_LINE_ARG));
 1348 }
 1349 
 1350 /*
 1351  * Downgrade a write lock into a single read lock.
 1352  */
 1353 void
 1354 __rw_downgrade_int(struct rwlock *rw LOCK_FILE_LINE_ARG_DEF)
 1355 {
 1356         struct turnstile *ts;
 1357         uintptr_t tid, v;
 1358         int rwait, wwait;
 1359 
 1360         if (SCHEDULER_STOPPED())
 1361                 return;
 1362 
 1363         KASSERT(rw->rw_lock != RW_DESTROYED,
 1364             ("rw_downgrade() of destroyed rwlock @ %s:%d", file, line));
 1365         __rw_assert(&rw->rw_lock, RA_WLOCKED | RA_NOTRECURSED, file, line);
 1366 #ifndef INVARIANTS
 1367         if (rw_recursed(rw))
 1368                 panic("downgrade of a recursed lock");
 1369 #endif
 1370 
 1371         WITNESS_DOWNGRADE(&rw->lock_object, 0, file, line);
 1372 
 1373         /*
 1374          * Convert from a writer to a single reader.  First we handle
 1375          * the easy case with no waiters.  If there are any waiters, we
 1376          * lock the turnstile and "disown" the lock.
 1377          */
 1378         tid = (uintptr_t)curthread;
 1379         if (atomic_cmpset_rel_ptr(&rw->rw_lock, tid, RW_READERS_LOCK(1)))
 1380                 goto out;
 1381 
 1382         /*
 1383          * Ok, we think we have waiters, so lock the turnstile so we can
 1384          * read the waiter flags without any races.
 1385          */
 1386         turnstile_chain_lock(&rw->lock_object);
 1387         v = rw->rw_lock & RW_LOCK_WAITERS;
 1388         rwait = v & RW_LOCK_READ_WAITERS;
 1389         wwait = v & RW_LOCK_WRITE_WAITERS;
 1390         MPASS(rwait | wwait);
 1391 
 1392         /*
 1393          * Downgrade from a write lock while preserving waiters flag
 1394          * and give up ownership of the turnstile.
 1395          */
 1396         ts = turnstile_lookup(&rw->lock_object);
 1397         MPASS(ts != NULL);
 1398         if (!wwait)
 1399                 v &= ~RW_LOCK_READ_WAITERS;
 1400         atomic_store_rel_ptr(&rw->rw_lock, RW_READERS_LOCK(1) | v);
 1401         /*
 1402          * Wake other readers if there are no writers pending.  Otherwise they
 1403          * won't be able to acquire the lock anyway.
 1404          */
 1405         if (rwait && !wwait) {
 1406                 turnstile_broadcast(ts, TS_SHARED_QUEUE);
 1407                 turnstile_unpend(ts);
 1408         } else
 1409                 turnstile_disown(ts);
 1410         turnstile_chain_unlock(&rw->lock_object);
 1411 out:
 1412         curthread->td_rw_rlocks++;
 1413         LOCK_LOG_LOCK("WDOWNGRADE", &rw->lock_object, 0, 0, file, line);
 1414         LOCKSTAT_RECORD0(rw__downgrade, rw);
 1415 }
 1416 
 1417 void
 1418 __rw_downgrade(volatile uintptr_t *c, const char *file, int line)
 1419 {
 1420         struct rwlock *rw;
 1421 
 1422         rw = rwlock2rw(c);
 1423         __rw_downgrade_int(rw LOCK_FILE_LINE_ARG);
 1424 }
 1425 
 1426 #ifdef INVARIANT_SUPPORT
 1427 #ifndef INVARIANTS
 1428 #undef __rw_assert
 1429 #endif
 1430 
 1431 /*
 1432  * In the non-WITNESS case, rw_assert() can only detect that at least
 1433  * *some* thread owns an rlock, but it cannot guarantee that *this*
 1434  * thread owns an rlock.
 1435  */
 1436 void
 1437 __rw_assert(const volatile uintptr_t *c, int what, const char *file, int line)
 1438 {
 1439         const struct rwlock *rw;
 1440 
 1441         if (SCHEDULER_STOPPED())
 1442                 return;
 1443 
 1444         rw = rwlock2rw(c);
 1445 
 1446         switch (what) {
 1447         case RA_LOCKED:
 1448         case RA_LOCKED | RA_RECURSED:
 1449         case RA_LOCKED | RA_NOTRECURSED:
 1450         case RA_RLOCKED:
 1451         case RA_RLOCKED | RA_RECURSED:
 1452         case RA_RLOCKED | RA_NOTRECURSED:
 1453 #ifdef WITNESS
 1454                 witness_assert(&rw->lock_object, what, file, line);
 1455 #else
 1456                 /*
 1457                  * If some other thread has a write lock or we have one
 1458                  * and are asserting a read lock, fail.  Also, if no one
 1459                  * has a lock at all, fail.
 1460                  */
 1461                 if (rw->rw_lock == RW_UNLOCKED ||
 1462                     (!(rw->rw_lock & RW_LOCK_READ) && (what & RA_RLOCKED ||
 1463                     rw_wowner(rw) != curthread)))
 1464                         panic("Lock %s not %slocked @ %s:%d\n",
 1465                             rw->lock_object.lo_name, (what & RA_RLOCKED) ?
 1466                             "read " : "", file, line);
 1467 
 1468                 if (!(rw->rw_lock & RW_LOCK_READ) && !(what & RA_RLOCKED)) {
 1469                         if (rw_recursed(rw)) {
 1470                                 if (what & RA_NOTRECURSED)
 1471                                         panic("Lock %s recursed @ %s:%d\n",
 1472                                             rw->lock_object.lo_name, file,
 1473                                             line);
 1474                         } else if (what & RA_RECURSED)
 1475                                 panic("Lock %s not recursed @ %s:%d\n",
 1476                                     rw->lock_object.lo_name, file, line);
 1477                 }
 1478 #endif
 1479                 break;
 1480         case RA_WLOCKED:
 1481         case RA_WLOCKED | RA_RECURSED:
 1482         case RA_WLOCKED | RA_NOTRECURSED:
 1483                 if (rw_wowner(rw) != curthread)
 1484                         panic("Lock %s not exclusively locked @ %s:%d\n",
 1485                             rw->lock_object.lo_name, file, line);
 1486                 if (rw_recursed(rw)) {
 1487                         if (what & RA_NOTRECURSED)
 1488                                 panic("Lock %s recursed @ %s:%d\n",
 1489                                     rw->lock_object.lo_name, file, line);
 1490                 } else if (what & RA_RECURSED)
 1491                         panic("Lock %s not recursed @ %s:%d\n",
 1492                             rw->lock_object.lo_name, file, line);
 1493                 break;
 1494         case RA_UNLOCKED:
 1495 #ifdef WITNESS
 1496                 witness_assert(&rw->lock_object, what, file, line);
 1497 #else
 1498                 /*
 1499                  * If we hold a write lock fail.  We can't reliably check
 1500                  * to see if we hold a read lock or not.
 1501                  */
 1502                 if (rw_wowner(rw) == curthread)
 1503                         panic("Lock %s exclusively locked @ %s:%d\n",
 1504                             rw->lock_object.lo_name, file, line);
 1505 #endif
 1506                 break;
 1507         default:
 1508                 panic("Unknown rw lock assertion: %d @ %s:%d", what, file,
 1509                     line);
 1510         }
 1511 }
 1512 #endif /* INVARIANT_SUPPORT */
 1513 
 1514 #ifdef DDB
 1515 void
 1516 db_show_rwlock(const struct lock_object *lock)
 1517 {
 1518         const struct rwlock *rw;
 1519         struct thread *td;
 1520 
 1521         rw = (const struct rwlock *)lock;
 1522 
 1523         db_printf(" state: ");
 1524         if (rw->rw_lock == RW_UNLOCKED)
 1525                 db_printf("UNLOCKED\n");
 1526         else if (rw->rw_lock == RW_DESTROYED) {
 1527                 db_printf("DESTROYED\n");
 1528                 return;
 1529         } else if (rw->rw_lock & RW_LOCK_READ)
 1530                 db_printf("RLOCK: %ju locks\n",
 1531                     (uintmax_t)(RW_READERS(rw->rw_lock)));
 1532         else {
 1533                 td = rw_wowner(rw);
 1534                 db_printf("WLOCK: %p (tid %d, pid %d, \"%s\")\n", td,
 1535                     td->td_tid, td->td_proc->p_pid, td->td_name);
 1536                 if (rw_recursed(rw))
 1537                         db_printf(" recursed: %u\n", rw->rw_recurse);
 1538         }
 1539         db_printf(" waiters: ");
 1540         switch (rw->rw_lock & (RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS)) {
 1541         case RW_LOCK_READ_WAITERS:
 1542                 db_printf("readers\n");
 1543                 break;
 1544         case RW_LOCK_WRITE_WAITERS:
 1545                 db_printf("writers\n");
 1546                 break;
 1547         case RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS:
 1548                 db_printf("readers and writers\n");
 1549                 break;
 1550         default:
 1551                 db_printf("none\n");
 1552                 break;
 1553         }
 1554 }
 1555 
 1556 #endif

Cache object: 83dd9b7e5410f651d7cf2a1f676fec07


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.