1 /* $NetBSD: kern_rwlock.c,v 1.67 2023/01/27 09:28:41 ozaki-r Exp $ */
2
3 /*-
4 * Copyright (c) 2002, 2006, 2007, 2008, 2009, 2019, 2020
5 * The NetBSD Foundation, Inc.
6 * All rights reserved.
7 *
8 * This code is derived from software contributed to The NetBSD Foundation
9 * by Jason R. Thorpe and Andrew Doran.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 /*
34 * Kernel reader/writer lock implementation, modeled after those
35 * found in Solaris, a description of which can be found in:
36 *
37 * Solaris Internals: Core Kernel Architecture, Jim Mauro and
38 * Richard McDougall.
39 *
40 * The NetBSD implementation differs from that described in the book, in
41 * that the locks are partially adaptive. Lock waiters spin wait while a
42 * lock is write held and the holder is still running on a CPU. The method
43 * of choosing which threads to awaken when a lock is released also differs,
44 * mainly to take account of the partially adaptive behaviour.
45 */
46
47 #include <sys/cdefs.h>
48 __KERNEL_RCSID(0, "$NetBSD: kern_rwlock.c,v 1.67 2023/01/27 09:28:41 ozaki-r Exp $");
49
50 #include "opt_lockdebug.h"
51
52 #define __RWLOCK_PRIVATE
53
54 #include <sys/param.h>
55 #include <sys/proc.h>
56 #include <sys/rwlock.h>
57 #include <sys/sched.h>
58 #include <sys/sleepq.h>
59 #include <sys/systm.h>
60 #include <sys/lockdebug.h>
61 #include <sys/cpu.h>
62 #include <sys/atomic.h>
63 #include <sys/lock.h>
64 #include <sys/pserialize.h>
65
66 #include <dev/lockstat.h>
67
68 #include <machine/rwlock.h>
69
70 /*
71 * LOCKDEBUG
72 */
73
74 #define RW_DEBUG_P(rw) (((rw)->rw_owner & RW_NODEBUG) == 0)
75
76 #define RW_WANTLOCK(rw, op) \
77 LOCKDEBUG_WANTLOCK(RW_DEBUG_P(rw), (rw), \
78 (uintptr_t)__builtin_return_address(0), op == RW_READER);
79 #define RW_LOCKED(rw, op) \
80 LOCKDEBUG_LOCKED(RW_DEBUG_P(rw), (rw), NULL, \
81 (uintptr_t)__builtin_return_address(0), op == RW_READER);
82 #define RW_UNLOCKED(rw, op) \
83 LOCKDEBUG_UNLOCKED(RW_DEBUG_P(rw), (rw), \
84 (uintptr_t)__builtin_return_address(0), op == RW_READER);
85
86 /*
87 * DIAGNOSTIC
88 */
89
90 #if defined(DIAGNOSTIC)
91 #define RW_ASSERT(rw, cond) \
92 do { \
93 if (__predict_false(!(cond))) \
94 rw_abort(__func__, __LINE__, rw, "assertion failed: " #cond);\
95 } while (/* CONSTCOND */ 0)
96 #else
97 #define RW_ASSERT(rw, cond) /* nothing */
98 #endif /* DIAGNOSTIC */
99
100 /*
101 * Memory barriers.
102 */
103 #ifdef __HAVE_ATOMIC_AS_MEMBAR
104 #define RW_MEMBAR_ACQUIRE()
105 #define RW_MEMBAR_RELEASE()
106 #define RW_MEMBAR_PRODUCER()
107 #else
108 #define RW_MEMBAR_ACQUIRE() membar_acquire()
109 #define RW_MEMBAR_RELEASE() membar_release()
110 #define RW_MEMBAR_PRODUCER() membar_producer()
111 #endif
112
113 /*
114 * For platforms that do not provide stubs, or for the LOCKDEBUG case.
115 */
116 #ifdef LOCKDEBUG
117 #undef __HAVE_RW_STUBS
118 #endif
119
120 #ifndef __HAVE_RW_STUBS
121 __strong_alias(rw_enter,rw_vector_enter);
122 __strong_alias(rw_exit,rw_vector_exit);
123 __strong_alias(rw_tryenter,rw_vector_tryenter);
124 #endif
125
126 static void rw_abort(const char *, size_t, krwlock_t *, const char *);
127 static void rw_dump(const volatile void *, lockop_printer_t);
128 static lwp_t *rw_owner(wchan_t);
129
130 lockops_t rwlock_lockops = {
131 .lo_name = "Reader / writer lock",
132 .lo_type = LOCKOPS_SLEEP,
133 .lo_dump = rw_dump,
134 };
135
136 syncobj_t rw_syncobj = {
137 .sobj_flag = SOBJ_SLEEPQ_SORTED,
138 .sobj_unsleep = turnstile_unsleep,
139 .sobj_changepri = turnstile_changepri,
140 .sobj_lendpri = sleepq_lendpri,
141 .sobj_owner = rw_owner,
142 };
143
144 /*
145 * rw_cas:
146 *
147 * Do an atomic compare-and-swap on the lock word.
148 */
149 static inline uintptr_t
150 rw_cas(krwlock_t *rw, uintptr_t o, uintptr_t n)
151 {
152
153 return (uintptr_t)atomic_cas_ptr((volatile void *)&rw->rw_owner,
154 (void *)o, (void *)n);
155 }
156
157 /*
158 * rw_swap:
159 *
160 * Do an atomic swap of the lock word. This is used only when it's
161 * known that the lock word is set up such that it can't be changed
162 * behind us (assert this), so there's no point considering the result.
163 */
164 static inline void
165 rw_swap(krwlock_t *rw, uintptr_t o, uintptr_t n)
166 {
167
168 n = (uintptr_t)atomic_swap_ptr((volatile void *)&rw->rw_owner,
169 (void *)n);
170
171 RW_ASSERT(rw, n == o);
172 RW_ASSERT(rw, (o & RW_HAS_WAITERS) != 0);
173 }
174
175 /*
176 * rw_dump:
177 *
178 * Dump the contents of a rwlock structure.
179 */
180 static void
181 rw_dump(const volatile void *cookie, lockop_printer_t pr)
182 {
183 const volatile krwlock_t *rw = cookie;
184
185 pr("owner/count : %#018lx flags : %#018x\n",
186 (long)RW_OWNER(rw), (int)RW_FLAGS(rw));
187 }
188
189 /*
190 * rw_abort:
191 *
192 * Dump information about an error and panic the system. This
193 * generates a lot of machine code in the DIAGNOSTIC case, so
194 * we ask the compiler to not inline it.
195 */
196 static void __noinline
197 rw_abort(const char *func, size_t line, krwlock_t *rw, const char *msg)
198 {
199
200 if (__predict_false(panicstr != NULL))
201 return;
202
203 LOCKDEBUG_ABORT(func, line, rw, &rwlock_lockops, msg);
204 }
205
206 /*
207 * rw_init:
208 *
209 * Initialize a rwlock for use.
210 */
211 void
212 _rw_init(krwlock_t *rw, uintptr_t return_address)
213 {
214
215 #ifdef LOCKDEBUG
216 /* XXX only because the assembly stubs can't handle RW_NODEBUG */
217 if (LOCKDEBUG_ALLOC(rw, &rwlock_lockops, return_address))
218 rw->rw_owner = 0;
219 else
220 rw->rw_owner = RW_NODEBUG;
221 #else
222 rw->rw_owner = 0;
223 #endif
224 }
225
226 void
227 rw_init(krwlock_t *rw)
228 {
229
230 _rw_init(rw, (uintptr_t)__builtin_return_address(0));
231 }
232
233 /*
234 * rw_destroy:
235 *
236 * Tear down a rwlock.
237 */
238 void
239 rw_destroy(krwlock_t *rw)
240 {
241
242 RW_ASSERT(rw, (rw->rw_owner & ~RW_NODEBUG) == 0);
243 LOCKDEBUG_FREE((rw->rw_owner & RW_NODEBUG) == 0, rw);
244 }
245
246 /*
247 * rw_oncpu:
248 *
249 * Return true if an rwlock owner is running on a CPU in the system.
250 * If the target is waiting on the kernel big lock, then we must
251 * release it. This is necessary to avoid deadlock.
252 */
253 static bool
254 rw_oncpu(uintptr_t owner)
255 {
256 #ifdef MULTIPROCESSOR
257 struct cpu_info *ci;
258 lwp_t *l;
259
260 KASSERT(kpreempt_disabled());
261
262 if ((owner & (RW_WRITE_LOCKED|RW_HAS_WAITERS)) != RW_WRITE_LOCKED) {
263 return false;
264 }
265
266 /*
267 * See lwp_dtor() why dereference of the LWP pointer is safe.
268 * We must have kernel preemption disabled for that.
269 */
270 l = (lwp_t *)(owner & RW_THREAD);
271 ci = l->l_cpu;
272
273 if (ci && ci->ci_curlwp == l) {
274 /* Target is running; do we need to block? */
275 return (ci->ci_biglock_wanted != l);
276 }
277 #endif
278 /* Not running. It may be safe to block now. */
279 return false;
280 }
281
282 /*
283 * rw_vector_enter:
284 *
285 * Acquire a rwlock.
286 */
287 void
288 rw_vector_enter(krwlock_t *rw, const krw_t op)
289 {
290 uintptr_t owner, incr, need_wait, set_wait, curthread, next;
291 turnstile_t *ts;
292 int queue;
293 lwp_t *l;
294 LOCKSTAT_TIMER(slptime);
295 LOCKSTAT_TIMER(slpcnt);
296 LOCKSTAT_TIMER(spintime);
297 LOCKSTAT_COUNTER(spincnt);
298 LOCKSTAT_FLAG(lsflag);
299
300 l = curlwp;
301 curthread = (uintptr_t)l;
302
303 RW_ASSERT(rw, !cpu_intr_p());
304 RW_ASSERT(rw, curthread != 0);
305 RW_WANTLOCK(rw, op);
306
307 if (__predict_true(panicstr == NULL)) {
308 KDASSERT(pserialize_not_in_read_section());
309 LOCKDEBUG_BARRIER(&kernel_lock, 1);
310 }
311
312 /*
313 * We play a slight trick here. If we're a reader, we want
314 * increment the read count. If we're a writer, we want to
315 * set the owner field and the WRITE_LOCKED bit.
316 *
317 * In the latter case, we expect those bits to be zero,
318 * therefore we can use an add operation to set them, which
319 * means an add operation for both cases.
320 */
321 if (__predict_true(op == RW_READER)) {
322 incr = RW_READ_INCR;
323 set_wait = RW_HAS_WAITERS;
324 need_wait = RW_WRITE_LOCKED | RW_WRITE_WANTED;
325 queue = TS_READER_Q;
326 } else {
327 RW_ASSERT(rw, op == RW_WRITER);
328 incr = curthread | RW_WRITE_LOCKED;
329 set_wait = RW_HAS_WAITERS | RW_WRITE_WANTED;
330 need_wait = RW_WRITE_LOCKED | RW_THREAD;
331 queue = TS_WRITER_Q;
332 }
333
334 LOCKSTAT_ENTER(lsflag);
335
336 KPREEMPT_DISABLE(curlwp);
337 for (owner = rw->rw_owner;;) {
338 /*
339 * Read the lock owner field. If the need-to-wait
340 * indicator is clear, then try to acquire the lock.
341 */
342 if ((owner & need_wait) == 0) {
343 next = rw_cas(rw, owner, (owner + incr) &
344 ~RW_WRITE_WANTED);
345 if (__predict_true(next == owner)) {
346 /* Got it! */
347 RW_MEMBAR_ACQUIRE();
348 break;
349 }
350
351 /*
352 * Didn't get it -- spin around again (we'll
353 * probably sleep on the next iteration).
354 */
355 owner = next;
356 continue;
357 }
358 if (__predict_false(RW_OWNER(rw) == curthread)) {
359 rw_abort(__func__, __LINE__, rw,
360 "locking against myself");
361 }
362 /*
363 * If the lock owner is running on another CPU, and
364 * there are no existing waiters, then spin.
365 */
366 if (rw_oncpu(owner)) {
367 LOCKSTAT_START_TIMER(lsflag, spintime);
368 u_int count = SPINLOCK_BACKOFF_MIN;
369 do {
370 KPREEMPT_ENABLE(curlwp);
371 SPINLOCK_BACKOFF(count);
372 KPREEMPT_DISABLE(curlwp);
373 owner = rw->rw_owner;
374 } while (rw_oncpu(owner));
375 LOCKSTAT_STOP_TIMER(lsflag, spintime);
376 LOCKSTAT_COUNT(spincnt, 1);
377 if ((owner & need_wait) == 0)
378 continue;
379 }
380
381 /*
382 * Grab the turnstile chain lock. Once we have that, we
383 * can adjust the waiter bits and sleep queue.
384 */
385 ts = turnstile_lookup(rw);
386
387 /*
388 * Mark the rwlock as having waiters. If the set fails,
389 * then we may not need to sleep and should spin again.
390 * Reload rw_owner because turnstile_lookup() may have
391 * spun on the turnstile chain lock.
392 */
393 owner = rw->rw_owner;
394 if ((owner & need_wait) == 0 || rw_oncpu(owner)) {
395 turnstile_exit(rw);
396 continue;
397 }
398 next = rw_cas(rw, owner, owner | set_wait);
399 /* XXX membar? */
400 if (__predict_false(next != owner)) {
401 turnstile_exit(rw);
402 owner = next;
403 continue;
404 }
405
406 LOCKSTAT_START_TIMER(lsflag, slptime);
407 turnstile_block(ts, queue, rw, &rw_syncobj);
408 LOCKSTAT_STOP_TIMER(lsflag, slptime);
409 LOCKSTAT_COUNT(slpcnt, 1);
410
411 /*
412 * No need for a memory barrier because of context switch.
413 * If not handed the lock, then spin again.
414 */
415 if (op == RW_READER || (rw->rw_owner & RW_THREAD) == curthread)
416 break;
417
418 owner = rw->rw_owner;
419 }
420 KPREEMPT_ENABLE(curlwp);
421
422 LOCKSTAT_EVENT_RA(lsflag, rw, LB_RWLOCK |
423 (op == RW_WRITER ? LB_SLEEP1 : LB_SLEEP2), slpcnt, slptime,
424 (l->l_rwcallsite != 0 ? l->l_rwcallsite :
425 (uintptr_t)__builtin_return_address(0)));
426 LOCKSTAT_EVENT_RA(lsflag, rw, LB_RWLOCK | LB_SPIN, spincnt, spintime,
427 (l->l_rwcallsite != 0 ? l->l_rwcallsite :
428 (uintptr_t)__builtin_return_address(0)));
429 LOCKSTAT_EXIT(lsflag);
430
431 RW_ASSERT(rw, (op != RW_READER && RW_OWNER(rw) == curthread) ||
432 (op == RW_READER && RW_COUNT(rw) != 0));
433 RW_LOCKED(rw, op);
434 }
435
436 /*
437 * rw_vector_exit:
438 *
439 * Release a rwlock.
440 */
441 void
442 rw_vector_exit(krwlock_t *rw)
443 {
444 uintptr_t curthread, owner, decr, newown, next;
445 turnstile_t *ts;
446 int rcnt, wcnt;
447 lwp_t *l;
448
449 l = curlwp;
450 curthread = (uintptr_t)l;
451 RW_ASSERT(rw, curthread != 0);
452
453 /*
454 * Again, we use a trick. Since we used an add operation to
455 * set the required lock bits, we can use a subtract to clear
456 * them, which makes the read-release and write-release path
457 * the same.
458 */
459 owner = rw->rw_owner;
460 if (__predict_false((owner & RW_WRITE_LOCKED) != 0)) {
461 RW_UNLOCKED(rw, RW_WRITER);
462 RW_ASSERT(rw, RW_OWNER(rw) == curthread);
463 decr = curthread | RW_WRITE_LOCKED;
464 } else {
465 RW_UNLOCKED(rw, RW_READER);
466 RW_ASSERT(rw, RW_COUNT(rw) != 0);
467 decr = RW_READ_INCR;
468 }
469
470 /*
471 * Compute what we expect the new value of the lock to be. Only
472 * proceed to do direct handoff if there are waiters, and if the
473 * lock would become unowned.
474 */
475 RW_MEMBAR_RELEASE();
476 for (;;) {
477 newown = (owner - decr);
478 if ((newown & (RW_THREAD | RW_HAS_WAITERS)) == RW_HAS_WAITERS)
479 break;
480 next = rw_cas(rw, owner, newown);
481 if (__predict_true(next == owner))
482 return;
483 owner = next;
484 }
485
486 /*
487 * Grab the turnstile chain lock. This gets the interlock
488 * on the sleep queue. Once we have that, we can adjust the
489 * waiter bits.
490 */
491 ts = turnstile_lookup(rw);
492 owner = rw->rw_owner;
493 RW_ASSERT(rw, ts != NULL);
494 RW_ASSERT(rw, (owner & RW_HAS_WAITERS) != 0);
495
496 wcnt = TS_WAITERS(ts, TS_WRITER_Q);
497 rcnt = TS_WAITERS(ts, TS_READER_Q);
498
499 /*
500 * Give the lock away.
501 *
502 * If we are releasing a write lock, then prefer to wake all
503 * outstanding readers. Otherwise, wake one writer if there
504 * are outstanding readers, or all writers if there are no
505 * pending readers. If waking one specific writer, the writer
506 * is handed the lock here. If waking multiple writers, we
507 * set WRITE_WANTED to block out new readers, and let them
508 * do the work of acquiring the lock in rw_vector_enter().
509 */
510 if (rcnt == 0 || decr == RW_READ_INCR) {
511 RW_ASSERT(rw, wcnt != 0);
512 RW_ASSERT(rw, (owner & RW_WRITE_WANTED) != 0);
513
514 if (rcnt != 0) {
515 /* Give the lock to the longest waiting writer. */
516 l = TS_FIRST(ts, TS_WRITER_Q);
517 newown = (uintptr_t)l | (owner & RW_NODEBUG);
518 newown |= RW_WRITE_LOCKED | RW_HAS_WAITERS;
519 if (wcnt > 1)
520 newown |= RW_WRITE_WANTED;
521 rw_swap(rw, owner, newown);
522 turnstile_wakeup(ts, TS_WRITER_Q, 1, l);
523 } else {
524 /* Wake all writers and let them fight it out. */
525 newown = owner & RW_NODEBUG;
526 newown |= RW_WRITE_WANTED;
527 rw_swap(rw, owner, newown);
528 turnstile_wakeup(ts, TS_WRITER_Q, wcnt, NULL);
529 }
530 } else {
531 RW_ASSERT(rw, rcnt != 0);
532
533 /*
534 * Give the lock to all blocked readers. If there
535 * is a writer waiting, new readers that arrive
536 * after the release will be blocked out.
537 */
538 newown = owner & RW_NODEBUG;
539 newown += rcnt << RW_READ_COUNT_SHIFT;
540 if (wcnt != 0)
541 newown |= RW_HAS_WAITERS | RW_WRITE_WANTED;
542
543 /* Wake up all sleeping readers. */
544 rw_swap(rw, owner, newown);
545 turnstile_wakeup(ts, TS_READER_Q, rcnt, NULL);
546 }
547 }
548
549 /*
550 * rw_vector_tryenter:
551 *
552 * Try to acquire a rwlock.
553 */
554 int
555 rw_vector_tryenter(krwlock_t *rw, const krw_t op)
556 {
557 uintptr_t curthread, owner, incr, need_wait, next;
558 lwp_t *l;
559
560 l = curlwp;
561 curthread = (uintptr_t)l;
562
563 RW_ASSERT(rw, curthread != 0);
564
565 if (op == RW_READER) {
566 incr = RW_READ_INCR;
567 need_wait = RW_WRITE_LOCKED | RW_WRITE_WANTED;
568 } else {
569 RW_ASSERT(rw, op == RW_WRITER);
570 incr = curthread | RW_WRITE_LOCKED;
571 need_wait = RW_WRITE_LOCKED | RW_THREAD;
572 }
573
574 for (owner = rw->rw_owner;; owner = next) {
575 if (__predict_false((owner & need_wait) != 0))
576 return 0;
577 next = rw_cas(rw, owner, owner + incr);
578 if (__predict_true(next == owner)) {
579 /* Got it! */
580 break;
581 }
582 }
583
584 RW_WANTLOCK(rw, op);
585 RW_LOCKED(rw, op);
586 RW_ASSERT(rw, (op != RW_READER && RW_OWNER(rw) == curthread) ||
587 (op == RW_READER && RW_COUNT(rw) != 0));
588
589 RW_MEMBAR_ACQUIRE();
590 return 1;
591 }
592
593 /*
594 * rw_downgrade:
595 *
596 * Downgrade a write lock to a read lock.
597 */
598 void
599 rw_downgrade(krwlock_t *rw)
600 {
601 uintptr_t owner, curthread, newown, next;
602 turnstile_t *ts;
603 int rcnt, wcnt;
604 lwp_t *l;
605
606 l = curlwp;
607 curthread = (uintptr_t)l;
608 RW_ASSERT(rw, curthread != 0);
609 RW_ASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) != 0);
610 RW_ASSERT(rw, RW_OWNER(rw) == curthread);
611 RW_UNLOCKED(rw, RW_WRITER);
612 #if !defined(DIAGNOSTIC)
613 __USE(curthread);
614 #endif
615
616 RW_MEMBAR_PRODUCER();
617
618 for (owner = rw->rw_owner;; owner = next) {
619 /*
620 * If there are no waiters we can do this the easy way. Try
621 * swapping us down to one read hold. If it fails, the lock
622 * condition has changed and we most likely now have
623 * waiters.
624 */
625 if ((owner & RW_HAS_WAITERS) == 0) {
626 newown = (owner & RW_NODEBUG);
627 next = rw_cas(rw, owner, newown + RW_READ_INCR);
628 if (__predict_true(next == owner)) {
629 RW_LOCKED(rw, RW_READER);
630 RW_ASSERT(rw,
631 (rw->rw_owner & RW_WRITE_LOCKED) == 0);
632 RW_ASSERT(rw, RW_COUNT(rw) != 0);
633 return;
634 }
635 continue;
636 }
637
638 /*
639 * Grab the turnstile chain lock. This gets the interlock
640 * on the sleep queue. Once we have that, we can adjust the
641 * waiter bits.
642 */
643 ts = turnstile_lookup(rw);
644 RW_ASSERT(rw, ts != NULL);
645
646 rcnt = TS_WAITERS(ts, TS_READER_Q);
647 wcnt = TS_WAITERS(ts, TS_WRITER_Q);
648
649 if (rcnt == 0) {
650 /*
651 * If there are no readers, just preserve the
652 * waiters bits, swap us down to one read hold and
653 * return.
654 */
655 RW_ASSERT(rw, wcnt != 0);
656 RW_ASSERT(rw, (rw->rw_owner & RW_WRITE_WANTED) != 0);
657 RW_ASSERT(rw, (rw->rw_owner & RW_HAS_WAITERS) != 0);
658
659 newown = owner & RW_NODEBUG;
660 newown |= RW_READ_INCR | RW_HAS_WAITERS |
661 RW_WRITE_WANTED;
662 next = rw_cas(rw, owner, newown);
663 turnstile_exit(rw);
664 if (__predict_true(next == owner))
665 break;
666 } else {
667 /*
668 * Give the lock to all blocked readers. We may
669 * retain one read hold if downgrading. If there is
670 * a writer waiting, new readers will be blocked
671 * out.
672 */
673 newown = owner & RW_NODEBUG;
674 newown += (rcnt << RW_READ_COUNT_SHIFT) + RW_READ_INCR;
675 if (wcnt != 0)
676 newown |= RW_HAS_WAITERS | RW_WRITE_WANTED;
677
678 next = rw_cas(rw, owner, newown);
679 if (__predict_true(next == owner)) {
680 /* Wake up all sleeping readers. */
681 turnstile_wakeup(ts, TS_READER_Q, rcnt, NULL);
682 break;
683 }
684 turnstile_exit(rw);
685 }
686 }
687
688 RW_WANTLOCK(rw, RW_READER);
689 RW_LOCKED(rw, RW_READER);
690 RW_ASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) == 0);
691 RW_ASSERT(rw, RW_COUNT(rw) != 0);
692 }
693
694 /*
695 * rw_tryupgrade:
696 *
697 * Try to upgrade a read lock to a write lock. We must be the only
698 * reader.
699 */
700 int
701 rw_tryupgrade(krwlock_t *rw)
702 {
703 uintptr_t owner, curthread, newown, next;
704 struct lwp *l;
705
706 l = curlwp;
707 curthread = (uintptr_t)l;
708 RW_ASSERT(rw, curthread != 0);
709 RW_ASSERT(rw, rw_read_held(rw));
710
711 for (owner = RW_READ_INCR;; owner = next) {
712 newown = curthread | RW_WRITE_LOCKED | (owner & ~RW_THREAD);
713 next = rw_cas(rw, owner, newown);
714 if (__predict_true(next == owner)) {
715 RW_MEMBAR_PRODUCER();
716 break;
717 }
718 RW_ASSERT(rw, (next & RW_WRITE_LOCKED) == 0);
719 if (__predict_false((next & RW_THREAD) != RW_READ_INCR)) {
720 RW_ASSERT(rw, (next & RW_THREAD) != 0);
721 return 0;
722 }
723 }
724
725 RW_UNLOCKED(rw, RW_READER);
726 RW_WANTLOCK(rw, RW_WRITER);
727 RW_LOCKED(rw, RW_WRITER);
728 RW_ASSERT(rw, rw->rw_owner & RW_WRITE_LOCKED);
729 RW_ASSERT(rw, RW_OWNER(rw) == curthread);
730
731 return 1;
732 }
733
734 /*
735 * rw_read_held:
736 *
737 * Returns true if the rwlock is held for reading. Must only be
738 * used for diagnostic assertions, and never be used to make
739 * decisions about how to use a rwlock.
740 */
741 int
742 rw_read_held(krwlock_t *rw)
743 {
744 uintptr_t owner;
745
746 if (rw == NULL)
747 return 0;
748 owner = rw->rw_owner;
749 return (owner & RW_WRITE_LOCKED) == 0 && (owner & RW_THREAD) != 0;
750 }
751
752 /*
753 * rw_write_held:
754 *
755 * Returns true if the rwlock is held for writing. Must only be
756 * used for diagnostic assertions, and never be used to make
757 * decisions about how to use a rwlock.
758 */
759 int
760 rw_write_held(krwlock_t *rw)
761 {
762
763 if (rw == NULL)
764 return 0;
765 return (rw->rw_owner & (RW_WRITE_LOCKED | RW_THREAD)) ==
766 (RW_WRITE_LOCKED | (uintptr_t)curlwp);
767 }
768
769 /*
770 * rw_lock_held:
771 *
772 * Returns true if the rwlock is held for reading or writing. Must
773 * only be used for diagnostic assertions, and never be used to make
774 * decisions about how to use a rwlock.
775 */
776 int
777 rw_lock_held(krwlock_t *rw)
778 {
779
780 if (rw == NULL)
781 return 0;
782 return (rw->rw_owner & RW_THREAD) != 0;
783 }
784
785 /*
786 * rw_lock_op:
787 *
788 * For a rwlock that is known to be held by the caller, return
789 * RW_READER or RW_WRITER to describe the hold type.
790 */
791 krw_t
792 rw_lock_op(krwlock_t *rw)
793 {
794
795 RW_ASSERT(rw, rw_lock_held(rw));
796
797 return (rw->rw_owner & RW_WRITE_LOCKED) != 0 ? RW_WRITER : RW_READER;
798 }
799
800 /*
801 * rw_owner:
802 *
803 * Return the current owner of an RW lock, but only if it is write
804 * held. Used for priority inheritance.
805 */
806 static lwp_t *
807 rw_owner(wchan_t obj)
808 {
809 krwlock_t *rw = (void *)(uintptr_t)obj; /* discard qualifiers */
810 uintptr_t owner = rw->rw_owner;
811
812 if ((owner & RW_WRITE_LOCKED) == 0)
813 return NULL;
814
815 return (void *)(owner & RW_THREAD);
816 }
817
818 /*
819 * rw_owner_running:
820 *
821 * Return true if a RW lock is unheld, or write held and the owner is
822 * running on a CPU. For the pagedaemon.
823 */
824 bool
825 rw_owner_running(const krwlock_t *rw)
826 {
827 #ifdef MULTIPROCESSOR
828 uintptr_t owner;
829 bool rv;
830
831 kpreempt_disable();
832 owner = rw->rw_owner;
833 rv = (owner & RW_THREAD) == 0 || rw_oncpu(owner);
834 kpreempt_enable();
835 return rv;
836 #else
837 return rw_owner(rw) == curlwp;
838 #endif
839 }
Cache object: 8e0e328df73a43325906a55e521c59a5
|