1 /* $OpenBSD: kern_rwlock.c,v 1.48 2022/05/10 16:56:16 bluhm Exp $ */
2
3 /*
4 * Copyright (c) 2002, 2003 Artur Grabowski <art@openbsd.org>
5 * Copyright (c) 2011 Thordur Bjornsson <thib@secnorth.net>
6 *
7 * Permission to use, copy, modify, and distribute this software for any
8 * purpose with or without fee is hereby granted, provided that the above
9 * copyright notice and this permission notice appear in all copies.
10 *
11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18 */
19
20 #include <sys/param.h>
21 #include <sys/systm.h>
22 #include <sys/pool.h>
23 #include <sys/proc.h>
24 #include <sys/rwlock.h>
25 #include <sys/limits.h>
26 #include <sys/atomic.h>
27 #include <sys/witness.h>
28
29 void rw_do_exit(struct rwlock *, unsigned long);
30
31 /* XXX - temporary measure until proc0 is properly aligned */
32 #define RW_PROC(p) (((long)p) & ~RWLOCK_MASK)
33
34 /*
35 * Other OSes implement more sophisticated mechanism to determine how long the
36 * process attempting to acquire the lock should be spinning. We start with
37 * the most simple approach: we do RW_SPINS attempts at most before eventually
38 * giving up and putting the process to sleep queue.
39 */
40 #define RW_SPINS 1000
41
42 #ifdef MULTIPROCESSOR
43 #define rw_cas(p, o, n) (atomic_cas_ulong(p, o, n) != o)
44 #else
45 static inline int
46 rw_cas(volatile unsigned long *p, unsigned long o, unsigned long n)
47 {
48 if (*p != o)
49 return (1);
50 *p = n;
51
52 return (0);
53 }
54 #endif
55
56 /*
57 * Magic wand for lock operations. Every operation checks if certain
58 * flags are set and if they aren't, it increments the lock with some
59 * value (that might need some computing in a few cases). If the operation
60 * fails, we need to set certain flags while waiting for the lock.
61 *
62 * RW_WRITE The lock must be completely empty. We increment it with
63 * RWLOCK_WRLOCK and the proc pointer of the holder.
64 * Sets RWLOCK_WAIT|RWLOCK_WRWANT while waiting.
65 * RW_READ RWLOCK_WRLOCK|RWLOCK_WRWANT may not be set. We increment
66 * with RWLOCK_READ_INCR. RWLOCK_WAIT while waiting.
67 */
68 static const struct rwlock_op {
69 unsigned long inc;
70 unsigned long check;
71 unsigned long wait_set;
72 long proc_mult;
73 int wait_prio;
74 } rw_ops[] = {
75 { /* RW_WRITE */
76 RWLOCK_WRLOCK,
77 ULONG_MAX,
78 RWLOCK_WAIT | RWLOCK_WRWANT,
79 1,
80 PLOCK - 4
81 },
82 { /* RW_READ */
83 RWLOCK_READ_INCR,
84 RWLOCK_WRLOCK | RWLOCK_WRWANT,
85 RWLOCK_WAIT,
86 0,
87 PLOCK
88 },
89 { /* Sparse Entry. */
90 0,
91 },
92 { /* RW_DOWNGRADE */
93 RWLOCK_READ_INCR - RWLOCK_WRLOCK,
94 0,
95 0,
96 -1,
97 PLOCK
98 },
99 };
100
101 void
102 rw_enter_read(struct rwlock *rwl)
103 {
104 unsigned long owner = rwl->rwl_owner;
105
106 if (__predict_false((owner & (RWLOCK_WRLOCK | RWLOCK_WRWANT)) ||
107 rw_cas(&rwl->rwl_owner, owner, owner + RWLOCK_READ_INCR)))
108 rw_enter(rwl, RW_READ);
109 else {
110 membar_enter_after_atomic();
111 WITNESS_CHECKORDER(&rwl->rwl_lock_obj, LOP_NEWORDER, NULL);
112 WITNESS_LOCK(&rwl->rwl_lock_obj, 0);
113 }
114 }
115
116 void
117 rw_enter_write(struct rwlock *rwl)
118 {
119 struct proc *p = curproc;
120
121 if (__predict_false(rw_cas(&rwl->rwl_owner, 0,
122 RW_PROC(p) | RWLOCK_WRLOCK)))
123 rw_enter(rwl, RW_WRITE);
124 else {
125 membar_enter_after_atomic();
126 WITNESS_CHECKORDER(&rwl->rwl_lock_obj,
127 LOP_EXCLUSIVE | LOP_NEWORDER, NULL);
128 WITNESS_LOCK(&rwl->rwl_lock_obj, LOP_EXCLUSIVE);
129 }
130 }
131
132 void
133 rw_exit_read(struct rwlock *rwl)
134 {
135 unsigned long owner;
136
137 rw_assert_rdlock(rwl);
138 WITNESS_UNLOCK(&rwl->rwl_lock_obj, 0);
139
140 membar_exit_before_atomic();
141 owner = rwl->rwl_owner;
142 if (__predict_false((owner & RWLOCK_WAIT) ||
143 rw_cas(&rwl->rwl_owner, owner, owner - RWLOCK_READ_INCR)))
144 rw_do_exit(rwl, 0);
145 }
146
147 void
148 rw_exit_write(struct rwlock *rwl)
149 {
150 unsigned long owner;
151
152 rw_assert_wrlock(rwl);
153 WITNESS_UNLOCK(&rwl->rwl_lock_obj, LOP_EXCLUSIVE);
154
155 membar_exit_before_atomic();
156 owner = rwl->rwl_owner;
157 if (__predict_false((owner & RWLOCK_WAIT) ||
158 rw_cas(&rwl->rwl_owner, owner, 0)))
159 rw_do_exit(rwl, RWLOCK_WRLOCK);
160 }
161
162 #ifdef DIAGNOSTIC
163 /*
164 * Put the diagnostic functions here to keep the main code free
165 * from ifdef clutter.
166 */
167 static void
168 rw_enter_diag(struct rwlock *rwl, int flags)
169 {
170 switch (flags & RW_OPMASK) {
171 case RW_WRITE:
172 case RW_READ:
173 if (RW_PROC(curproc) == RW_PROC(rwl->rwl_owner))
174 panic("rw_enter: %s locking against myself",
175 rwl->rwl_name);
176 break;
177 case RW_DOWNGRADE:
178 /*
179 * If we're downgrading, we must hold the write lock.
180 */
181 if ((rwl->rwl_owner & RWLOCK_WRLOCK) == 0)
182 panic("rw_enter: %s downgrade of non-write lock",
183 rwl->rwl_name);
184 if (RW_PROC(curproc) != RW_PROC(rwl->rwl_owner))
185 panic("rw_enter: %s downgrade, not holder",
186 rwl->rwl_name);
187 break;
188
189 default:
190 panic("rw_enter: unknown op 0x%x", flags);
191 }
192 }
193
194 #else
195 #define rw_enter_diag(r, f)
196 #endif
197
198 static void
199 _rw_init_flags_witness(struct rwlock *rwl, const char *name, int lo_flags,
200 const struct lock_type *type)
201 {
202 rwl->rwl_owner = 0;
203 rwl->rwl_name = name;
204
205 #ifdef WITNESS
206 rwl->rwl_lock_obj.lo_flags = lo_flags;
207 rwl->rwl_lock_obj.lo_name = name;
208 rwl->rwl_lock_obj.lo_type = type;
209 WITNESS_INIT(&rwl->rwl_lock_obj, type);
210 #else
211 (void)type;
212 (void)lo_flags;
213 #endif
214 }
215
216 void
217 _rw_init_flags(struct rwlock *rwl, const char *name, int flags,
218 const struct lock_type *type)
219 {
220 _rw_init_flags_witness(rwl, name, RWLOCK_LO_FLAGS(flags), type);
221 }
222
223 int
224 rw_enter(struct rwlock *rwl, int flags)
225 {
226 const struct rwlock_op *op;
227 struct sleep_state sls;
228 unsigned long inc, o;
229 #ifdef MULTIPROCESSOR
230 /*
231 * If process holds the kernel lock, then we want to give up on CPU
232 * as soon as possible so other processes waiting for the kernel lock
233 * can progress. Hence no spinning if we hold the kernel lock.
234 */
235 unsigned int spin = (_kernel_lock_held()) ? 0 : RW_SPINS;
236 #endif
237 int error, prio;
238 #ifdef WITNESS
239 int lop_flags;
240
241 lop_flags = LOP_NEWORDER;
242 if (flags & RW_WRITE)
243 lop_flags |= LOP_EXCLUSIVE;
244 if (flags & RW_DUPOK)
245 lop_flags |= LOP_DUPOK;
246 if ((flags & RW_NOSLEEP) == 0 && (flags & RW_DOWNGRADE) == 0)
247 WITNESS_CHECKORDER(&rwl->rwl_lock_obj, lop_flags, NULL);
248 #endif
249
250 op = &rw_ops[(flags & RW_OPMASK) - 1];
251
252 inc = op->inc + RW_PROC(curproc) * op->proc_mult;
253 retry:
254 while (__predict_false(((o = rwl->rwl_owner) & op->check) != 0)) {
255 unsigned long set = o | op->wait_set;
256 int do_sleep;
257
258 /* Avoid deadlocks after panic or in DDB */
259 if (panicstr || db_active)
260 return (0);
261
262 #ifdef MULTIPROCESSOR
263 /*
264 * It makes sense to try to spin just in case the lock
265 * is acquired by writer.
266 */
267 if ((o & RWLOCK_WRLOCK) && (spin != 0)) {
268 spin--;
269 CPU_BUSY_CYCLE();
270 continue;
271 }
272 #endif
273
274 rw_enter_diag(rwl, flags);
275
276 if (flags & RW_NOSLEEP)
277 return (EBUSY);
278
279 prio = op->wait_prio;
280 if (flags & RW_INTR)
281 prio |= PCATCH;
282 sleep_setup(&sls, rwl, prio, rwl->rwl_name, 0);
283
284 do_sleep = !rw_cas(&rwl->rwl_owner, o, set);
285
286 error = sleep_finish(&sls, do_sleep);
287 if ((flags & RW_INTR) &&
288 (error != 0))
289 return (error);
290 if (flags & RW_SLEEPFAIL)
291 return (EAGAIN);
292 }
293
294 if (__predict_false(rw_cas(&rwl->rwl_owner, o, o + inc)))
295 goto retry;
296 membar_enter_after_atomic();
297
298 /*
299 * If old lock had RWLOCK_WAIT and RWLOCK_WRLOCK set, it means we
300 * downgraded a write lock and had possible read waiter, wake them
301 * to let them retry the lock.
302 */
303 if (__predict_false((o & (RWLOCK_WRLOCK|RWLOCK_WAIT)) ==
304 (RWLOCK_WRLOCK|RWLOCK_WAIT)))
305 wakeup(rwl);
306
307 if (flags & RW_DOWNGRADE)
308 WITNESS_DOWNGRADE(&rwl->rwl_lock_obj, lop_flags);
309 else
310 WITNESS_LOCK(&rwl->rwl_lock_obj, lop_flags);
311
312 return (0);
313 }
314
315 void
316 rw_exit(struct rwlock *rwl)
317 {
318 unsigned long wrlock;
319
320 /* Avoid deadlocks after panic or in DDB */
321 if (panicstr || db_active)
322 return;
323
324 wrlock = rwl->rwl_owner & RWLOCK_WRLOCK;
325 if (wrlock)
326 rw_assert_wrlock(rwl);
327 else
328 rw_assert_rdlock(rwl);
329 WITNESS_UNLOCK(&rwl->rwl_lock_obj, wrlock ? LOP_EXCLUSIVE : 0);
330
331 membar_exit_before_atomic();
332 rw_do_exit(rwl, wrlock);
333 }
334
335 /* membar_exit_before_atomic() has to precede call of this function. */
336 void
337 rw_do_exit(struct rwlock *rwl, unsigned long wrlock)
338 {
339 unsigned long owner, set;
340
341 do {
342 owner = rwl->rwl_owner;
343 if (wrlock)
344 set = 0;
345 else
346 set = (owner - RWLOCK_READ_INCR) &
347 ~(RWLOCK_WAIT|RWLOCK_WRWANT);
348 /*
349 * Potential MP race here. If the owner had WRWANT set, we
350 * cleared it and a reader can sneak in before a writer.
351 */
352 } while (__predict_false(rw_cas(&rwl->rwl_owner, owner, set)));
353
354 if (owner & RWLOCK_WAIT)
355 wakeup(rwl);
356 }
357
358 int
359 rw_status(struct rwlock *rwl)
360 {
361 unsigned long owner = rwl->rwl_owner;
362
363 if (owner & RWLOCK_WRLOCK) {
364 if (RW_PROC(curproc) == RW_PROC(owner))
365 return RW_WRITE;
366 else
367 return RW_WRITE_OTHER;
368 }
369 if (owner)
370 return RW_READ;
371 return (0);
372 }
373
374 #ifdef DIAGNOSTIC
375 void
376 rw_assert_wrlock(struct rwlock *rwl)
377 {
378 if (panicstr || db_active)
379 return;
380
381 #ifdef WITNESS
382 witness_assert(&rwl->rwl_lock_obj, LA_XLOCKED);
383 #else
384 if (!(rwl->rwl_owner & RWLOCK_WRLOCK))
385 panic("%s: lock not held", rwl->rwl_name);
386
387 if (RW_PROC(curproc) != RW_PROC(rwl->rwl_owner))
388 panic("%s: lock not held by this process", rwl->rwl_name);
389 #endif
390 }
391
392 void
393 rw_assert_rdlock(struct rwlock *rwl)
394 {
395 if (panicstr || db_active)
396 return;
397
398 #ifdef WITNESS
399 witness_assert(&rwl->rwl_lock_obj, LA_SLOCKED);
400 #else
401 if (!RW_PROC(rwl->rwl_owner) || (rwl->rwl_owner & RWLOCK_WRLOCK))
402 panic("%s: lock not shared", rwl->rwl_name);
403 #endif
404 }
405
406 void
407 rw_assert_anylock(struct rwlock *rwl)
408 {
409 if (panicstr || db_active)
410 return;
411
412 #ifdef WITNESS
413 witness_assert(&rwl->rwl_lock_obj, LA_LOCKED);
414 #else
415 switch (rw_status(rwl)) {
416 case RW_WRITE_OTHER:
417 panic("%s: lock held by different process", rwl->rwl_name);
418 case 0:
419 panic("%s: lock not held", rwl->rwl_name);
420 }
421 #endif
422 }
423
424 void
425 rw_assert_unlocked(struct rwlock *rwl)
426 {
427 if (panicstr || db_active)
428 return;
429
430 #ifdef WITNESS
431 witness_assert(&rwl->rwl_lock_obj, LA_UNLOCKED);
432 #else
433 if (RW_PROC(curproc) == RW_PROC(rwl->rwl_owner))
434 panic("%s: lock held", rwl->rwl_name);
435 #endif
436 }
437 #endif
438
439 /* recursive rwlocks; */
440 void
441 _rrw_init_flags(struct rrwlock *rrwl, const char *name, int flags,
442 const struct lock_type *type)
443 {
444 memset(rrwl, 0, sizeof(struct rrwlock));
445 _rw_init_flags_witness(&rrwl->rrwl_lock, name, RRWLOCK_LO_FLAGS(flags),
446 type);
447 }
448
449 int
450 rrw_enter(struct rrwlock *rrwl, int flags)
451 {
452 int rv;
453
454 if (RW_PROC(rrwl->rrwl_lock.rwl_owner) == RW_PROC(curproc)) {
455 if (flags & RW_RECURSEFAIL)
456 return (EDEADLK);
457 else {
458 rrwl->rrwl_wcnt++;
459 WITNESS_LOCK(&rrwl->rrwl_lock.rwl_lock_obj,
460 LOP_EXCLUSIVE);
461 return (0);
462 }
463 }
464
465 rv = rw_enter(&rrwl->rrwl_lock, flags);
466 if (rv == 0)
467 rrwl->rrwl_wcnt = 1;
468
469 return (rv);
470 }
471
472 void
473 rrw_exit(struct rrwlock *rrwl)
474 {
475
476 if (RW_PROC(rrwl->rrwl_lock.rwl_owner) == RW_PROC(curproc)) {
477 KASSERT(rrwl->rrwl_wcnt > 0);
478 rrwl->rrwl_wcnt--;
479 if (rrwl->rrwl_wcnt != 0) {
480 WITNESS_UNLOCK(&rrwl->rrwl_lock.rwl_lock_obj,
481 LOP_EXCLUSIVE);
482 return;
483 }
484 }
485
486 rw_exit(&rrwl->rrwl_lock);
487 }
488
489 int
490 rrw_status(struct rrwlock *rrwl)
491 {
492 return (rw_status(&rrwl->rrwl_lock));
493 }
494
495 /*-
496 * Copyright (c) 2008 The NetBSD Foundation, Inc.
497 * All rights reserved.
498 *
499 * This code is derived from software contributed to The NetBSD Foundation
500 * by Andrew Doran.
501 *
502 * Redistribution and use in source and binary forms, with or without
503 * modification, are permitted provided that the following conditions
504 * are met:
505 * 1. Redistributions of source code must retain the above copyright
506 * notice, this list of conditions and the following disclaimer.
507 * 2. Redistributions in binary form must reproduce the above copyright
508 * notice, this list of conditions and the following disclaimer in the
509 * documentation and/or other materials provided with the distribution.
510 *
511 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
512 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
513 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
514 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
515 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
516 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
517 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
518 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
519 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
520 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
521 * POSSIBILITY OF SUCH DAMAGE.
522 */
523
524 #define RWLOCK_OBJ_MAGIC 0x5aa3c85d
525 struct rwlock_obj {
526 struct rwlock ro_lock;
527 u_int ro_magic;
528 u_int ro_refcnt;
529 };
530
531
532 struct pool rwlock_obj_pool;
533
534 /*
535 * rw_obj_init:
536 *
537 * Initialize the mutex object store.
538 */
539 void
540 rw_obj_init(void)
541 {
542 pool_init(&rwlock_obj_pool, sizeof(struct rwlock_obj), 0, IPL_MPFLOOR,
543 PR_WAITOK, "rwobjpl", NULL);
544 }
545
546 /*
547 * rw_obj_alloc:
548 *
549 * Allocate a single lock object.
550 */
551 void
552 _rw_obj_alloc_flags(struct rwlock **lock, const char *name, int flags,
553 struct lock_type *type)
554 {
555 struct rwlock_obj *mo;
556
557 mo = pool_get(&rwlock_obj_pool, PR_WAITOK);
558 mo->ro_magic = RWLOCK_OBJ_MAGIC;
559 _rw_init_flags(&mo->ro_lock, name, flags, type);
560 mo->ro_refcnt = 1;
561
562 *lock = &mo->ro_lock;
563 }
564
565 /*
566 * rw_obj_hold:
567 *
568 * Add a single reference to a lock object. A reference to the object
569 * must already be held, and must be held across this call.
570 */
571
572 void
573 rw_obj_hold(struct rwlock *lock)
574 {
575 struct rwlock_obj *mo = (struct rwlock_obj *)lock;
576
577 KASSERTMSG(mo->ro_magic == RWLOCK_OBJ_MAGIC,
578 "%s: lock %p: mo->ro_magic (%#x) != RWLOCK_OBJ_MAGIC (%#x)",
579 __func__, mo, mo->ro_magic, RWLOCK_OBJ_MAGIC);
580 KASSERTMSG(mo->ro_refcnt > 0,
581 "%s: lock %p: mo->ro_refcnt (%#x) == 0",
582 __func__, mo, mo->ro_refcnt);
583
584 atomic_inc_int(&mo->ro_refcnt);
585 }
586
587 /*
588 * rw_obj_free:
589 *
590 * Drop a reference from a lock object. If the last reference is being
591 * dropped, free the object and return true. Otherwise, return false.
592 */
593 int
594 rw_obj_free(struct rwlock *lock)
595 {
596 struct rwlock_obj *mo = (struct rwlock_obj *)lock;
597
598 KASSERTMSG(mo->ro_magic == RWLOCK_OBJ_MAGIC,
599 "%s: lock %p: mo->ro_magic (%#x) != RWLOCK_OBJ_MAGIC (%#x)",
600 __func__, mo, mo->ro_magic, RWLOCK_OBJ_MAGIC);
601 KASSERTMSG(mo->ro_refcnt > 0,
602 "%s: lock %p: mo->ro_refcnt (%#x) == 0",
603 __func__, mo, mo->ro_refcnt);
604
605 if (atomic_dec_int_nv(&mo->ro_refcnt) > 0) {
606 return false;
607 }
608 #if notyet
609 WITNESS_DESTROY(&mo->ro_lock);
610 #endif
611 pool_put(&rwlock_obj_pool, mo);
612 return true;
613 }
Cache object: fed457456d3adbddeea8662f6a4fbdff
|