The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/kern_rwlock.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*      $NetBSD: kern_rwlock.c,v 1.67 2023/01/27 09:28:41 ozaki-r Exp $ */
    2 
    3 /*-
    4  * Copyright (c) 2002, 2006, 2007, 2008, 2009, 2019, 2020
    5  *     The NetBSD Foundation, Inc.
    6  * All rights reserved.
    7  *
    8  * This code is derived from software contributed to The NetBSD Foundation
    9  * by Jason R. Thorpe and Andrew Doran.
   10  *
   11  * Redistribution and use in source and binary forms, with or without
   12  * modification, are permitted provided that the following conditions
   13  * are met:
   14  * 1. Redistributions of source code must retain the above copyright
   15  *    notice, this list of conditions and the following disclaimer.
   16  * 2. Redistributions in binary form must reproduce the above copyright
   17  *    notice, this list of conditions and the following disclaimer in the
   18  *    documentation and/or other materials provided with the distribution.
   19  *
   20  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
   21  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
   22  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
   23  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
   24  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
   25  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
   26  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
   27  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
   28  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
   29  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   30  * POSSIBILITY OF SUCH DAMAGE.
   31  */
   32 
   33 /*
   34  * Kernel reader/writer lock implementation, modeled after those
   35  * found in Solaris, a description of which can be found in:
   36  *
   37  *      Solaris Internals: Core Kernel Architecture, Jim Mauro and
   38  *          Richard McDougall.
   39  *
   40  * The NetBSD implementation differs from that described in the book, in
   41  * that the locks are partially adaptive.  Lock waiters spin wait while a
   42  * lock is write held and the holder is still running on a CPU.  The method
   43  * of choosing which threads to awaken when a lock is released also differs,
   44  * mainly to take account of the partially adaptive behaviour.
   45  */
   46 
   47 #include <sys/cdefs.h>
   48 __KERNEL_RCSID(0, "$NetBSD: kern_rwlock.c,v 1.67 2023/01/27 09:28:41 ozaki-r Exp $");
   49 
   50 #include "opt_lockdebug.h"
   51 
   52 #define __RWLOCK_PRIVATE
   53 
   54 #include <sys/param.h>
   55 #include <sys/proc.h>
   56 #include <sys/rwlock.h>
   57 #include <sys/sched.h>
   58 #include <sys/sleepq.h>
   59 #include <sys/systm.h>
   60 #include <sys/lockdebug.h>
   61 #include <sys/cpu.h>
   62 #include <sys/atomic.h>
   63 #include <sys/lock.h>
   64 #include <sys/pserialize.h>
   65 
   66 #include <dev/lockstat.h>
   67 
   68 #include <machine/rwlock.h>
   69 
   70 /*
   71  * LOCKDEBUG
   72  */
   73 
   74 #define RW_DEBUG_P(rw)          (((rw)->rw_owner & RW_NODEBUG) == 0)
   75 
   76 #define RW_WANTLOCK(rw, op) \
   77     LOCKDEBUG_WANTLOCK(RW_DEBUG_P(rw), (rw), \
   78         (uintptr_t)__builtin_return_address(0), op == RW_READER);
   79 #define RW_LOCKED(rw, op) \
   80     LOCKDEBUG_LOCKED(RW_DEBUG_P(rw), (rw), NULL, \
   81         (uintptr_t)__builtin_return_address(0), op == RW_READER);
   82 #define RW_UNLOCKED(rw, op) \
   83     LOCKDEBUG_UNLOCKED(RW_DEBUG_P(rw), (rw), \
   84         (uintptr_t)__builtin_return_address(0), op == RW_READER);
   85 
   86 /*
   87  * DIAGNOSTIC
   88  */
   89 
   90 #if defined(DIAGNOSTIC)
   91 #define RW_ASSERT(rw, cond) \
   92 do { \
   93         if (__predict_false(!(cond))) \
   94                 rw_abort(__func__, __LINE__, rw, "assertion failed: " #cond);\
   95 } while (/* CONSTCOND */ 0)
   96 #else
   97 #define RW_ASSERT(rw, cond)     /* nothing */
   98 #endif  /* DIAGNOSTIC */
   99 
  100 /*
  101  * Memory barriers.
  102  */
  103 #ifdef __HAVE_ATOMIC_AS_MEMBAR
  104 #define RW_MEMBAR_ACQUIRE()
  105 #define RW_MEMBAR_RELEASE()
  106 #define RW_MEMBAR_PRODUCER()
  107 #else
  108 #define RW_MEMBAR_ACQUIRE()             membar_acquire()
  109 #define RW_MEMBAR_RELEASE()             membar_release()
  110 #define RW_MEMBAR_PRODUCER()            membar_producer()
  111 #endif
  112 
  113 /*
  114  * For platforms that do not provide stubs, or for the LOCKDEBUG case.
  115  */
  116 #ifdef LOCKDEBUG
  117 #undef  __HAVE_RW_STUBS
  118 #endif
  119 
  120 #ifndef __HAVE_RW_STUBS
  121 __strong_alias(rw_enter,rw_vector_enter);
  122 __strong_alias(rw_exit,rw_vector_exit);
  123 __strong_alias(rw_tryenter,rw_vector_tryenter);
  124 #endif
  125 
  126 static void     rw_abort(const char *, size_t, krwlock_t *, const char *);
  127 static void     rw_dump(const volatile void *, lockop_printer_t);
  128 static lwp_t    *rw_owner(wchan_t);
  129 
  130 lockops_t rwlock_lockops = {
  131         .lo_name = "Reader / writer lock",
  132         .lo_type = LOCKOPS_SLEEP,
  133         .lo_dump = rw_dump,
  134 };
  135 
  136 syncobj_t rw_syncobj = {
  137         .sobj_flag      = SOBJ_SLEEPQ_SORTED,
  138         .sobj_unsleep   = turnstile_unsleep,
  139         .sobj_changepri = turnstile_changepri,
  140         .sobj_lendpri   = sleepq_lendpri,
  141         .sobj_owner     = rw_owner,
  142 };
  143 
  144 /*
  145  * rw_cas:
  146  *
  147  *      Do an atomic compare-and-swap on the lock word.
  148  */
  149 static inline uintptr_t
  150 rw_cas(krwlock_t *rw, uintptr_t o, uintptr_t n)
  151 {
  152 
  153         return (uintptr_t)atomic_cas_ptr((volatile void *)&rw->rw_owner,
  154             (void *)o, (void *)n);
  155 }
  156 
  157 /*
  158  * rw_swap:
  159  *
  160  *      Do an atomic swap of the lock word.  This is used only when it's
  161  *      known that the lock word is set up such that it can't be changed
  162  *      behind us (assert this), so there's no point considering the result.
  163  */
  164 static inline void
  165 rw_swap(krwlock_t *rw, uintptr_t o, uintptr_t n)
  166 {
  167 
  168         n = (uintptr_t)atomic_swap_ptr((volatile void *)&rw->rw_owner,
  169             (void *)n);
  170 
  171         RW_ASSERT(rw, n == o);
  172         RW_ASSERT(rw, (o & RW_HAS_WAITERS) != 0);
  173 }
  174 
  175 /*
  176  * rw_dump:
  177  *
  178  *      Dump the contents of a rwlock structure.
  179  */
  180 static void
  181 rw_dump(const volatile void *cookie, lockop_printer_t pr)
  182 {
  183         const volatile krwlock_t *rw = cookie;
  184 
  185         pr("owner/count  : %#018lx flags    : %#018x\n",
  186             (long)RW_OWNER(rw), (int)RW_FLAGS(rw));
  187 }
  188 
  189 /*
  190  * rw_abort:
  191  *
  192  *      Dump information about an error and panic the system.  This
  193  *      generates a lot of machine code in the DIAGNOSTIC case, so
  194  *      we ask the compiler to not inline it.
  195  */
  196 static void __noinline
  197 rw_abort(const char *func, size_t line, krwlock_t *rw, const char *msg)
  198 {
  199 
  200         if (__predict_false(panicstr != NULL))
  201                 return;
  202 
  203         LOCKDEBUG_ABORT(func, line, rw, &rwlock_lockops, msg);
  204 }
  205 
  206 /*
  207  * rw_init:
  208  *
  209  *      Initialize a rwlock for use.
  210  */
  211 void
  212 _rw_init(krwlock_t *rw, uintptr_t return_address)
  213 {
  214 
  215 #ifdef LOCKDEBUG
  216         /* XXX only because the assembly stubs can't handle RW_NODEBUG */
  217         if (LOCKDEBUG_ALLOC(rw, &rwlock_lockops, return_address))
  218                 rw->rw_owner = 0;
  219         else
  220                 rw->rw_owner = RW_NODEBUG;
  221 #else
  222         rw->rw_owner = 0;
  223 #endif
  224 }
  225 
  226 void
  227 rw_init(krwlock_t *rw)
  228 {
  229 
  230         _rw_init(rw, (uintptr_t)__builtin_return_address(0));
  231 }
  232 
  233 /*
  234  * rw_destroy:
  235  *
  236  *      Tear down a rwlock.
  237  */
  238 void
  239 rw_destroy(krwlock_t *rw)
  240 {
  241 
  242         RW_ASSERT(rw, (rw->rw_owner & ~RW_NODEBUG) == 0);
  243         LOCKDEBUG_FREE((rw->rw_owner & RW_NODEBUG) == 0, rw);
  244 }
  245 
  246 /*
  247  * rw_oncpu:
  248  *
  249  *      Return true if an rwlock owner is running on a CPU in the system.
  250  *      If the target is waiting on the kernel big lock, then we must
  251  *      release it.  This is necessary to avoid deadlock.
  252  */
  253 static bool
  254 rw_oncpu(uintptr_t owner)
  255 {
  256 #ifdef MULTIPROCESSOR
  257         struct cpu_info *ci;
  258         lwp_t *l;
  259 
  260         KASSERT(kpreempt_disabled());
  261 
  262         if ((owner & (RW_WRITE_LOCKED|RW_HAS_WAITERS)) != RW_WRITE_LOCKED) {
  263                 return false;
  264         }
  265 
  266         /*
  267          * See lwp_dtor() why dereference of the LWP pointer is safe.
  268          * We must have kernel preemption disabled for that.
  269          */
  270         l = (lwp_t *)(owner & RW_THREAD);
  271         ci = l->l_cpu;
  272 
  273         if (ci && ci->ci_curlwp == l) {
  274                 /* Target is running; do we need to block? */
  275                 return (ci->ci_biglock_wanted != l);
  276         }
  277 #endif
  278         /* Not running.  It may be safe to block now. */
  279         return false;
  280 }
  281 
  282 /*
  283  * rw_vector_enter:
  284  *
  285  *      Acquire a rwlock.
  286  */
  287 void
  288 rw_vector_enter(krwlock_t *rw, const krw_t op)
  289 {
  290         uintptr_t owner, incr, need_wait, set_wait, curthread, next;
  291         turnstile_t *ts;
  292         int queue;
  293         lwp_t *l;
  294         LOCKSTAT_TIMER(slptime);
  295         LOCKSTAT_TIMER(slpcnt);
  296         LOCKSTAT_TIMER(spintime);
  297         LOCKSTAT_COUNTER(spincnt);
  298         LOCKSTAT_FLAG(lsflag);
  299 
  300         l = curlwp;
  301         curthread = (uintptr_t)l;
  302 
  303         RW_ASSERT(rw, !cpu_intr_p());
  304         RW_ASSERT(rw, curthread != 0);
  305         RW_WANTLOCK(rw, op);
  306 
  307         if (__predict_true(panicstr == NULL)) {
  308                 KDASSERT(pserialize_not_in_read_section());
  309                 LOCKDEBUG_BARRIER(&kernel_lock, 1);
  310         }
  311 
  312         /*
  313          * We play a slight trick here.  If we're a reader, we want
  314          * increment the read count.  If we're a writer, we want to
  315          * set the owner field and the WRITE_LOCKED bit.
  316          *
  317          * In the latter case, we expect those bits to be zero,
  318          * therefore we can use an add operation to set them, which
  319          * means an add operation for both cases.
  320          */
  321         if (__predict_true(op == RW_READER)) {
  322                 incr = RW_READ_INCR;
  323                 set_wait = RW_HAS_WAITERS;
  324                 need_wait = RW_WRITE_LOCKED | RW_WRITE_WANTED;
  325                 queue = TS_READER_Q;
  326         } else {
  327                 RW_ASSERT(rw, op == RW_WRITER);
  328                 incr = curthread | RW_WRITE_LOCKED;
  329                 set_wait = RW_HAS_WAITERS | RW_WRITE_WANTED;
  330                 need_wait = RW_WRITE_LOCKED | RW_THREAD;
  331                 queue = TS_WRITER_Q;
  332         }
  333 
  334         LOCKSTAT_ENTER(lsflag);
  335 
  336         KPREEMPT_DISABLE(curlwp);
  337         for (owner = rw->rw_owner;;) {
  338                 /*
  339                  * Read the lock owner field.  If the need-to-wait
  340                  * indicator is clear, then try to acquire the lock.
  341                  */
  342                 if ((owner & need_wait) == 0) {
  343                         next = rw_cas(rw, owner, (owner + incr) &
  344                             ~RW_WRITE_WANTED);
  345                         if (__predict_true(next == owner)) {
  346                                 /* Got it! */
  347                                 RW_MEMBAR_ACQUIRE();
  348                                 break;
  349                         }
  350 
  351                         /*
  352                          * Didn't get it -- spin around again (we'll
  353                          * probably sleep on the next iteration).
  354                          */
  355                         owner = next;
  356                         continue;
  357                 }
  358                 if (__predict_false(RW_OWNER(rw) == curthread)) {
  359                         rw_abort(__func__, __LINE__, rw,
  360                             "locking against myself");
  361                 }
  362                 /*
  363                  * If the lock owner is running on another CPU, and
  364                  * there are no existing waiters, then spin.
  365                  */
  366                 if (rw_oncpu(owner)) {
  367                         LOCKSTAT_START_TIMER(lsflag, spintime);
  368                         u_int count = SPINLOCK_BACKOFF_MIN;
  369                         do {
  370                                 KPREEMPT_ENABLE(curlwp);
  371                                 SPINLOCK_BACKOFF(count);
  372                                 KPREEMPT_DISABLE(curlwp);
  373                                 owner = rw->rw_owner;
  374                         } while (rw_oncpu(owner));
  375                         LOCKSTAT_STOP_TIMER(lsflag, spintime);
  376                         LOCKSTAT_COUNT(spincnt, 1);
  377                         if ((owner & need_wait) == 0)
  378                                 continue;
  379                 }
  380 
  381                 /*
  382                  * Grab the turnstile chain lock.  Once we have that, we
  383                  * can adjust the waiter bits and sleep queue.
  384                  */
  385                 ts = turnstile_lookup(rw);
  386 
  387                 /*
  388                  * Mark the rwlock as having waiters.  If the set fails,
  389                  * then we may not need to sleep and should spin again.
  390                  * Reload rw_owner because turnstile_lookup() may have
  391                  * spun on the turnstile chain lock.
  392                  */
  393                 owner = rw->rw_owner;
  394                 if ((owner & need_wait) == 0 || rw_oncpu(owner)) {
  395                         turnstile_exit(rw);
  396                         continue;
  397                 }
  398                 next = rw_cas(rw, owner, owner | set_wait);
  399                 /* XXX membar? */
  400                 if (__predict_false(next != owner)) {
  401                         turnstile_exit(rw);
  402                         owner = next;
  403                         continue;
  404                 }
  405 
  406                 LOCKSTAT_START_TIMER(lsflag, slptime);
  407                 turnstile_block(ts, queue, rw, &rw_syncobj);
  408                 LOCKSTAT_STOP_TIMER(lsflag, slptime);
  409                 LOCKSTAT_COUNT(slpcnt, 1);
  410 
  411                 /*
  412                  * No need for a memory barrier because of context switch.
  413                  * If not handed the lock, then spin again.
  414                  */
  415                 if (op == RW_READER || (rw->rw_owner & RW_THREAD) == curthread)
  416                         break;
  417 
  418                 owner = rw->rw_owner;
  419         }
  420         KPREEMPT_ENABLE(curlwp);
  421 
  422         LOCKSTAT_EVENT_RA(lsflag, rw, LB_RWLOCK |
  423             (op == RW_WRITER ? LB_SLEEP1 : LB_SLEEP2), slpcnt, slptime,
  424             (l->l_rwcallsite != 0 ? l->l_rwcallsite :
  425               (uintptr_t)__builtin_return_address(0)));
  426         LOCKSTAT_EVENT_RA(lsflag, rw, LB_RWLOCK | LB_SPIN, spincnt, spintime,
  427             (l->l_rwcallsite != 0 ? l->l_rwcallsite :
  428               (uintptr_t)__builtin_return_address(0)));
  429         LOCKSTAT_EXIT(lsflag);
  430 
  431         RW_ASSERT(rw, (op != RW_READER && RW_OWNER(rw) == curthread) ||
  432             (op == RW_READER && RW_COUNT(rw) != 0));
  433         RW_LOCKED(rw, op);
  434 }
  435 
  436 /*
  437  * rw_vector_exit:
  438  *
  439  *      Release a rwlock.
  440  */
  441 void
  442 rw_vector_exit(krwlock_t *rw)
  443 {
  444         uintptr_t curthread, owner, decr, newown, next;
  445         turnstile_t *ts;
  446         int rcnt, wcnt;
  447         lwp_t *l;
  448 
  449         l = curlwp;
  450         curthread = (uintptr_t)l;
  451         RW_ASSERT(rw, curthread != 0);
  452 
  453         /*
  454          * Again, we use a trick.  Since we used an add operation to
  455          * set the required lock bits, we can use a subtract to clear
  456          * them, which makes the read-release and write-release path
  457          * the same.
  458          */
  459         owner = rw->rw_owner;
  460         if (__predict_false((owner & RW_WRITE_LOCKED) != 0)) {
  461                 RW_UNLOCKED(rw, RW_WRITER);
  462                 RW_ASSERT(rw, RW_OWNER(rw) == curthread);
  463                 decr = curthread | RW_WRITE_LOCKED;
  464         } else {
  465                 RW_UNLOCKED(rw, RW_READER);
  466                 RW_ASSERT(rw, RW_COUNT(rw) != 0);
  467                 decr = RW_READ_INCR;
  468         }
  469 
  470         /*
  471          * Compute what we expect the new value of the lock to be. Only
  472          * proceed to do direct handoff if there are waiters, and if the
  473          * lock would become unowned.
  474          */
  475         RW_MEMBAR_RELEASE();
  476         for (;;) {
  477                 newown = (owner - decr);
  478                 if ((newown & (RW_THREAD | RW_HAS_WAITERS)) == RW_HAS_WAITERS)
  479                         break;
  480                 next = rw_cas(rw, owner, newown);
  481                 if (__predict_true(next == owner))
  482                         return;
  483                 owner = next;
  484         }
  485 
  486         /*
  487          * Grab the turnstile chain lock.  This gets the interlock
  488          * on the sleep queue.  Once we have that, we can adjust the
  489          * waiter bits.
  490          */
  491         ts = turnstile_lookup(rw);
  492         owner = rw->rw_owner;
  493         RW_ASSERT(rw, ts != NULL);
  494         RW_ASSERT(rw, (owner & RW_HAS_WAITERS) != 0);
  495 
  496         wcnt = TS_WAITERS(ts, TS_WRITER_Q);
  497         rcnt = TS_WAITERS(ts, TS_READER_Q);
  498 
  499         /*
  500          * Give the lock away.
  501          *
  502          * If we are releasing a write lock, then prefer to wake all
  503          * outstanding readers.  Otherwise, wake one writer if there
  504          * are outstanding readers, or all writers if there are no
  505          * pending readers.  If waking one specific writer, the writer
  506          * is handed the lock here.  If waking multiple writers, we
  507          * set WRITE_WANTED to block out new readers, and let them
  508          * do the work of acquiring the lock in rw_vector_enter().
  509          */
  510         if (rcnt == 0 || decr == RW_READ_INCR) {
  511                 RW_ASSERT(rw, wcnt != 0);
  512                 RW_ASSERT(rw, (owner & RW_WRITE_WANTED) != 0);
  513 
  514                 if (rcnt != 0) {
  515                         /* Give the lock to the longest waiting writer. */
  516                         l = TS_FIRST(ts, TS_WRITER_Q);
  517                         newown = (uintptr_t)l | (owner & RW_NODEBUG);
  518                         newown |= RW_WRITE_LOCKED | RW_HAS_WAITERS;
  519                         if (wcnt > 1)
  520                                 newown |= RW_WRITE_WANTED;
  521                         rw_swap(rw, owner, newown);
  522                         turnstile_wakeup(ts, TS_WRITER_Q, 1, l);
  523                 } else {
  524                         /* Wake all writers and let them fight it out. */
  525                         newown = owner & RW_NODEBUG;
  526                         newown |= RW_WRITE_WANTED;
  527                         rw_swap(rw, owner, newown);
  528                         turnstile_wakeup(ts, TS_WRITER_Q, wcnt, NULL);
  529                 }
  530         } else {
  531                 RW_ASSERT(rw, rcnt != 0);
  532 
  533                 /*
  534                  * Give the lock to all blocked readers.  If there
  535                  * is a writer waiting, new readers that arrive
  536                  * after the release will be blocked out.
  537                  */
  538                 newown = owner & RW_NODEBUG;
  539                 newown += rcnt << RW_READ_COUNT_SHIFT;
  540                 if (wcnt != 0)
  541                         newown |= RW_HAS_WAITERS | RW_WRITE_WANTED;
  542                         
  543                 /* Wake up all sleeping readers. */
  544                 rw_swap(rw, owner, newown);
  545                 turnstile_wakeup(ts, TS_READER_Q, rcnt, NULL);
  546         }
  547 }
  548 
  549 /*
  550  * rw_vector_tryenter:
  551  *
  552  *      Try to acquire a rwlock.
  553  */
  554 int
  555 rw_vector_tryenter(krwlock_t *rw, const krw_t op)
  556 {
  557         uintptr_t curthread, owner, incr, need_wait, next;
  558         lwp_t *l;
  559 
  560         l = curlwp;
  561         curthread = (uintptr_t)l;
  562 
  563         RW_ASSERT(rw, curthread != 0);
  564 
  565         if (op == RW_READER) {
  566                 incr = RW_READ_INCR;
  567                 need_wait = RW_WRITE_LOCKED | RW_WRITE_WANTED;
  568         } else {
  569                 RW_ASSERT(rw, op == RW_WRITER);
  570                 incr = curthread | RW_WRITE_LOCKED;
  571                 need_wait = RW_WRITE_LOCKED | RW_THREAD;
  572         }
  573 
  574         for (owner = rw->rw_owner;; owner = next) {
  575                 if (__predict_false((owner & need_wait) != 0))
  576                         return 0;
  577                 next = rw_cas(rw, owner, owner + incr);
  578                 if (__predict_true(next == owner)) {
  579                         /* Got it! */
  580                         break;
  581                 }
  582         }
  583 
  584         RW_WANTLOCK(rw, op);
  585         RW_LOCKED(rw, op);
  586         RW_ASSERT(rw, (op != RW_READER && RW_OWNER(rw) == curthread) ||
  587             (op == RW_READER && RW_COUNT(rw) != 0));
  588 
  589         RW_MEMBAR_ACQUIRE();
  590         return 1;
  591 }
  592 
  593 /*
  594  * rw_downgrade:
  595  *
  596  *      Downgrade a write lock to a read lock.
  597  */
  598 void
  599 rw_downgrade(krwlock_t *rw)
  600 {
  601         uintptr_t owner, curthread, newown, next;
  602         turnstile_t *ts;
  603         int rcnt, wcnt;
  604         lwp_t *l;
  605 
  606         l = curlwp;
  607         curthread = (uintptr_t)l;
  608         RW_ASSERT(rw, curthread != 0);
  609         RW_ASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) != 0);
  610         RW_ASSERT(rw, RW_OWNER(rw) == curthread);
  611         RW_UNLOCKED(rw, RW_WRITER);
  612 #if !defined(DIAGNOSTIC)
  613         __USE(curthread);
  614 #endif
  615 
  616         RW_MEMBAR_PRODUCER();
  617 
  618         for (owner = rw->rw_owner;; owner = next) {
  619                 /*
  620                  * If there are no waiters we can do this the easy way.  Try
  621                  * swapping us down to one read hold.  If it fails, the lock
  622                  * condition has changed and we most likely now have
  623                  * waiters.
  624                  */
  625                 if ((owner & RW_HAS_WAITERS) == 0) {
  626                         newown = (owner & RW_NODEBUG);
  627                         next = rw_cas(rw, owner, newown + RW_READ_INCR);
  628                         if (__predict_true(next == owner)) {
  629                                 RW_LOCKED(rw, RW_READER);
  630                                 RW_ASSERT(rw,
  631                                     (rw->rw_owner & RW_WRITE_LOCKED) == 0);
  632                                 RW_ASSERT(rw, RW_COUNT(rw) != 0);
  633                                 return;
  634                         }
  635                         continue;
  636                 }
  637 
  638                 /*
  639                  * Grab the turnstile chain lock.  This gets the interlock
  640                  * on the sleep queue.  Once we have that, we can adjust the
  641                  * waiter bits.
  642                  */
  643                 ts = turnstile_lookup(rw);
  644                 RW_ASSERT(rw, ts != NULL);
  645 
  646                 rcnt = TS_WAITERS(ts, TS_READER_Q);
  647                 wcnt = TS_WAITERS(ts, TS_WRITER_Q);
  648 
  649                 if (rcnt == 0) {
  650                         /*
  651                          * If there are no readers, just preserve the
  652                          * waiters bits, swap us down to one read hold and
  653                          * return.
  654                          */
  655                         RW_ASSERT(rw, wcnt != 0);
  656                         RW_ASSERT(rw, (rw->rw_owner & RW_WRITE_WANTED) != 0);
  657                         RW_ASSERT(rw, (rw->rw_owner & RW_HAS_WAITERS) != 0);
  658 
  659                         newown = owner & RW_NODEBUG;
  660                         newown |= RW_READ_INCR | RW_HAS_WAITERS |
  661                             RW_WRITE_WANTED;
  662                         next = rw_cas(rw, owner, newown);
  663                         turnstile_exit(rw);
  664                         if (__predict_true(next == owner))
  665                                 break;
  666                 } else {
  667                         /*
  668                          * Give the lock to all blocked readers.  We may
  669                          * retain one read hold if downgrading.  If there is
  670                          * a writer waiting, new readers will be blocked
  671                          * out.
  672                          */
  673                         newown = owner & RW_NODEBUG;
  674                         newown += (rcnt << RW_READ_COUNT_SHIFT) + RW_READ_INCR;
  675                         if (wcnt != 0)
  676                                 newown |= RW_HAS_WAITERS | RW_WRITE_WANTED;
  677 
  678                         next = rw_cas(rw, owner, newown);
  679                         if (__predict_true(next == owner)) {
  680                                 /* Wake up all sleeping readers. */
  681                                 turnstile_wakeup(ts, TS_READER_Q, rcnt, NULL);
  682                                 break;
  683                         }
  684                         turnstile_exit(rw);
  685                 }
  686         }
  687 
  688         RW_WANTLOCK(rw, RW_READER);
  689         RW_LOCKED(rw, RW_READER);
  690         RW_ASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) == 0);
  691         RW_ASSERT(rw, RW_COUNT(rw) != 0);
  692 }
  693 
  694 /*
  695  * rw_tryupgrade:
  696  *
  697  *      Try to upgrade a read lock to a write lock.  We must be the only
  698  *      reader.
  699  */
  700 int
  701 rw_tryupgrade(krwlock_t *rw)
  702 {
  703         uintptr_t owner, curthread, newown, next;
  704         struct lwp *l;
  705 
  706         l = curlwp;
  707         curthread = (uintptr_t)l;
  708         RW_ASSERT(rw, curthread != 0);
  709         RW_ASSERT(rw, rw_read_held(rw));
  710 
  711         for (owner = RW_READ_INCR;; owner = next) {
  712                 newown = curthread | RW_WRITE_LOCKED | (owner & ~RW_THREAD);
  713                 next = rw_cas(rw, owner, newown);
  714                 if (__predict_true(next == owner)) {
  715                         RW_MEMBAR_PRODUCER();
  716                         break;
  717                 }
  718                 RW_ASSERT(rw, (next & RW_WRITE_LOCKED) == 0);
  719                 if (__predict_false((next & RW_THREAD) != RW_READ_INCR)) {
  720                         RW_ASSERT(rw, (next & RW_THREAD) != 0);
  721                         return 0;
  722                 }
  723         }
  724 
  725         RW_UNLOCKED(rw, RW_READER);
  726         RW_WANTLOCK(rw, RW_WRITER);
  727         RW_LOCKED(rw, RW_WRITER);
  728         RW_ASSERT(rw, rw->rw_owner & RW_WRITE_LOCKED);
  729         RW_ASSERT(rw, RW_OWNER(rw) == curthread);
  730 
  731         return 1;
  732 }
  733 
  734 /*
  735  * rw_read_held:
  736  *
  737  *      Returns true if the rwlock is held for reading.  Must only be
  738  *      used for diagnostic assertions, and never be used to make
  739  *      decisions about how to use a rwlock.
  740  */
  741 int
  742 rw_read_held(krwlock_t *rw)
  743 {
  744         uintptr_t owner;
  745 
  746         if (rw == NULL)
  747                 return 0;
  748         owner = rw->rw_owner;
  749         return (owner & RW_WRITE_LOCKED) == 0 && (owner & RW_THREAD) != 0;
  750 }
  751 
  752 /*
  753  * rw_write_held:
  754  *
  755  *      Returns true if the rwlock is held for writing.  Must only be
  756  *      used for diagnostic assertions, and never be used to make
  757  *      decisions about how to use a rwlock.
  758  */
  759 int
  760 rw_write_held(krwlock_t *rw)
  761 {
  762 
  763         if (rw == NULL)
  764                 return 0;
  765         return (rw->rw_owner & (RW_WRITE_LOCKED | RW_THREAD)) ==
  766             (RW_WRITE_LOCKED | (uintptr_t)curlwp);
  767 }
  768 
  769 /*
  770  * rw_lock_held:
  771  *
  772  *      Returns true if the rwlock is held for reading or writing.  Must
  773  *      only be used for diagnostic assertions, and never be used to make
  774  *      decisions about how to use a rwlock.
  775  */
  776 int
  777 rw_lock_held(krwlock_t *rw)
  778 {
  779 
  780         if (rw == NULL)
  781                 return 0;
  782         return (rw->rw_owner & RW_THREAD) != 0;
  783 }
  784 
  785 /*
  786  * rw_lock_op:
  787  *
  788  *      For a rwlock that is known to be held by the caller, return
  789  *      RW_READER or RW_WRITER to describe the hold type.
  790  */
  791 krw_t
  792 rw_lock_op(krwlock_t *rw)
  793 {
  794 
  795         RW_ASSERT(rw, rw_lock_held(rw));
  796 
  797         return (rw->rw_owner & RW_WRITE_LOCKED) != 0 ? RW_WRITER : RW_READER;
  798 }
  799 
  800 /*
  801  * rw_owner:
  802  *
  803  *      Return the current owner of an RW lock, but only if it is write
  804  *      held.  Used for priority inheritance.
  805  */
  806 static lwp_t *
  807 rw_owner(wchan_t obj)
  808 {
  809         krwlock_t *rw = (void *)(uintptr_t)obj; /* discard qualifiers */
  810         uintptr_t owner = rw->rw_owner;
  811 
  812         if ((owner & RW_WRITE_LOCKED) == 0)
  813                 return NULL;
  814 
  815         return (void *)(owner & RW_THREAD);
  816 }
  817 
  818 /*
  819  * rw_owner_running:
  820  *
  821  *      Return true if a RW lock is unheld, or write held and the owner is
  822  *      running on a CPU.  For the pagedaemon.
  823  */
  824 bool
  825 rw_owner_running(const krwlock_t *rw)
  826 {
  827 #ifdef MULTIPROCESSOR
  828         uintptr_t owner;
  829         bool rv;
  830 
  831         kpreempt_disable();
  832         owner = rw->rw_owner;
  833         rv = (owner & RW_THREAD) == 0 || rw_oncpu(owner);
  834         kpreempt_enable();
  835         return rv;
  836 #else
  837         return rw_owner(rw) == curlwp;
  838 #endif
  839 }

Cache object: 8e0e328df73a43325906a55e521c59a5


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.