FreeBSD/Linux Kernel Cross Reference
sys/kern/kern_umtx.c
1 /*-
2 * Copyright (c) 2004, David Xu <davidxu@freebsd.org>
3 * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org>
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice unmodified, this list of conditions, and the following
11 * disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28 #include <sys/cdefs.h>
29 __FBSDID("$FreeBSD$");
30
31 #include "opt_compat.h"
32 #include <sys/param.h>
33 #include <sys/kernel.h>
34 #include <sys/limits.h>
35 #include <sys/lock.h>
36 #include <sys/malloc.h>
37 #include <sys/mutex.h>
38 #include <sys/priv.h>
39 #include <sys/proc.h>
40 #include <sys/sched.h>
41 #include <sys/smp.h>
42 #include <sys/sysctl.h>
43 #include <sys/sysent.h>
44 #include <sys/systm.h>
45 #include <sys/sysproto.h>
46 #include <sys/eventhandler.h>
47 #include <sys/umtx.h>
48
49 #include <vm/vm.h>
50 #include <vm/vm_param.h>
51 #include <vm/pmap.h>
52 #include <vm/vm_map.h>
53 #include <vm/vm_object.h>
54
55 #include <machine/cpu.h>
56
57 #ifdef COMPAT_IA32
58 #include <compat/freebsd32/freebsd32_proto.h>
59 #endif
60
61 #define _UMUTEX_TRY 1
62 #define _UMUTEX_WAIT 2
63
64 /* Priority inheritance mutex info. */
65 struct umtx_pi {
66 /* Owner thread */
67 struct thread *pi_owner;
68
69 /* Reference count */
70 int pi_refcount;
71
72 /* List entry to link umtx holding by thread */
73 TAILQ_ENTRY(umtx_pi) pi_link;
74
75 /* List entry in hash */
76 TAILQ_ENTRY(umtx_pi) pi_hashlink;
77
78 /* List for waiters */
79 TAILQ_HEAD(,umtx_q) pi_blocked;
80
81 /* Identify a userland lock object */
82 struct umtx_key pi_key;
83 };
84
85 /* A userland synchronous object user. */
86 struct umtx_q {
87 /* Linked list for the hash. */
88 TAILQ_ENTRY(umtx_q) uq_link;
89
90 /* Umtx key. */
91 struct umtx_key uq_key;
92
93 /* Umtx flags. */
94 int uq_flags;
95 #define UQF_UMTXQ 0x0001
96
97 /* The thread waits on. */
98 struct thread *uq_thread;
99
100 /*
101 * Blocked on PI mutex. read can use chain lock
102 * or umtx_lock, write must have both chain lock and
103 * umtx_lock being hold.
104 */
105 struct umtx_pi *uq_pi_blocked;
106
107 /* On blocked list */
108 TAILQ_ENTRY(umtx_q) uq_lockq;
109
110 /* Thread contending with us */
111 TAILQ_HEAD(,umtx_pi) uq_pi_contested;
112
113 /* Inherited priority from PP mutex */
114 u_char uq_inherited_pri;
115 };
116
117 TAILQ_HEAD(umtxq_head, umtx_q);
118
119 /* Userland lock object's wait-queue chain */
120 struct umtxq_chain {
121 /* Lock for this chain. */
122 struct mtx uc_lock;
123
124 /* List of sleep queues. */
125 struct umtxq_head uc_queue[2];
126 #define UMTX_SHARED_QUEUE 0
127 #define UMTX_EXCLUSIVE_QUEUE 1
128
129 /* Busy flag */
130 char uc_busy;
131
132 /* Chain lock waiters */
133 int uc_waiters;
134
135 /* All PI in the list */
136 TAILQ_HEAD(,umtx_pi) uc_pi_list;
137 };
138
139 #define UMTXQ_LOCKED_ASSERT(uc) mtx_assert(&(uc)->uc_lock, MA_OWNED)
140 #define UMTXQ_BUSY_ASSERT(uc) KASSERT(&(uc)->uc_busy, ("umtx chain is not busy"))
141
142 /*
143 * Don't propagate time-sharing priority, there is a security reason,
144 * a user can simply introduce PI-mutex, let thread A lock the mutex,
145 * and let another thread B block on the mutex, because B is
146 * sleeping, its priority will be boosted, this causes A's priority to
147 * be boosted via priority propagating too and will never be lowered even
148 * if it is using 100%CPU, this is unfair to other processes.
149 */
150
151 #define UPRI(td) (((td)->td_user_pri >= PRI_MIN_TIMESHARE &&\
152 (td)->td_user_pri <= PRI_MAX_TIMESHARE) ?\
153 PRI_MAX_TIMESHARE : (td)->td_user_pri)
154
155 #define GOLDEN_RATIO_PRIME 2654404609U
156 #define UMTX_CHAINS 128
157 #define UMTX_SHIFTS (__WORD_BIT - 7)
158
159 #define GET_SHARE(flags) \
160 (((flags) & USYNC_PROCESS_SHARED) == 0 ? THREAD_SHARE : PROCESS_SHARE)
161
162 #define BUSY_SPINS 200
163
164 static uma_zone_t umtx_pi_zone;
165 static struct umtxq_chain umtxq_chains[2][UMTX_CHAINS];
166 static MALLOC_DEFINE(M_UMTX, "umtx", "UMTX queue memory");
167 static int umtx_pi_allocated;
168
169 SYSCTL_NODE(_debug, OID_AUTO, umtx, CTLFLAG_RW, 0, "umtx debug");
170 SYSCTL_INT(_debug_umtx, OID_AUTO, umtx_pi_allocated, CTLFLAG_RD,
171 &umtx_pi_allocated, 0, "Allocated umtx_pi");
172
173 static void umtxq_sysinit(void *);
174 static void umtxq_hash(struct umtx_key *key);
175 static struct umtxq_chain *umtxq_getchain(struct umtx_key *key);
176 static void umtxq_lock(struct umtx_key *key);
177 static void umtxq_unlock(struct umtx_key *key);
178 static void umtxq_busy(struct umtx_key *key);
179 static void umtxq_unbusy(struct umtx_key *key);
180 static void umtxq_insert_queue(struct umtx_q *uq, int q);
181 static void umtxq_remove_queue(struct umtx_q *uq, int q);
182 static int umtxq_sleep(struct umtx_q *uq, const char *wmesg, int timo);
183 static int umtxq_count(struct umtx_key *key);
184 static struct umtx_pi *umtx_pi_alloc(int);
185 static void umtx_pi_free(struct umtx_pi *pi);
186 static void umtx_pi_adjust_locked(struct thread *td, u_char oldpri);
187 static int do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags);
188 static void umtx_thread_cleanup(struct thread *td);
189 static void umtx_exec_hook(void *arg __unused, struct proc *p __unused,
190 struct image_params *imgp __unused);
191 SYSINIT(umtx, SI_SUB_EVENTHANDLER+1, SI_ORDER_MIDDLE, umtxq_sysinit, NULL);
192
193 #define umtxq_signal(key, nwake) umtxq_signal_queue((key), (nwake), UMTX_SHARED_QUEUE)
194 #define umtxq_insert(uq) umtxq_insert_queue((uq), UMTX_SHARED_QUEUE)
195 #define umtxq_remove(uq) umtxq_remove_queue((uq), UMTX_SHARED_QUEUE)
196
197 static struct mtx umtx_lock;
198
199 static void
200 umtxq_sysinit(void *arg __unused)
201 {
202 int i, j;
203
204 umtx_pi_zone = uma_zcreate("umtx pi", sizeof(struct umtx_pi),
205 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
206 for (i = 0; i < 2; ++i) {
207 for (j = 0; j < UMTX_CHAINS; ++j) {
208 mtx_init(&umtxq_chains[i][j].uc_lock, "umtxql", NULL,
209 MTX_DEF | MTX_DUPOK);
210 TAILQ_INIT(&umtxq_chains[i][j].uc_queue[0]);
211 TAILQ_INIT(&umtxq_chains[i][j].uc_queue[1]);
212 TAILQ_INIT(&umtxq_chains[i][j].uc_pi_list);
213 umtxq_chains[i][j].uc_busy = 0;
214 umtxq_chains[i][j].uc_waiters = 0;
215 }
216 }
217 mtx_init(&umtx_lock, "umtx lock", NULL, MTX_SPIN);
218 EVENTHANDLER_REGISTER(process_exec, umtx_exec_hook, NULL,
219 EVENTHANDLER_PRI_ANY);
220 }
221
222 struct umtx_q *
223 umtxq_alloc(void)
224 {
225 struct umtx_q *uq;
226
227 uq = malloc(sizeof(struct umtx_q), M_UMTX, M_WAITOK | M_ZERO);
228 TAILQ_INIT(&uq->uq_pi_contested);
229 uq->uq_inherited_pri = PRI_MAX;
230 return (uq);
231 }
232
233 void
234 umtxq_free(struct umtx_q *uq)
235 {
236 free(uq, M_UMTX);
237 }
238
239 static inline void
240 umtxq_hash(struct umtx_key *key)
241 {
242 unsigned n = (uintptr_t)key->info.both.a + key->info.both.b;
243 key->hash = ((n * GOLDEN_RATIO_PRIME) >> UMTX_SHIFTS) % UMTX_CHAINS;
244 }
245
246 static inline struct umtxq_chain *
247 umtxq_getchain(struct umtx_key *key)
248 {
249 if (key->type <= TYPE_CV)
250 return (&umtxq_chains[1][key->hash]);
251 return (&umtxq_chains[0][key->hash]);
252 }
253
254 /*
255 * Lock a chain.
256 */
257 static inline void
258 umtxq_lock(struct umtx_key *key)
259 {
260 struct umtxq_chain *uc;
261
262 uc = umtxq_getchain(key);
263 mtx_lock(&uc->uc_lock);
264 }
265
266 /*
267 * Unlock a chain.
268 */
269 static inline void
270 umtxq_unlock(struct umtx_key *key)
271 {
272 struct umtxq_chain *uc;
273
274 uc = umtxq_getchain(key);
275 mtx_unlock(&uc->uc_lock);
276 }
277
278 /*
279 * Set chain to busy state when following operation
280 * may be blocked (kernel mutex can not be used).
281 */
282 static inline void
283 umtxq_busy(struct umtx_key *key)
284 {
285 struct umtxq_chain *uc;
286
287 uc = umtxq_getchain(key);
288 mtx_assert(&uc->uc_lock, MA_OWNED);
289 if (uc->uc_busy) {
290 #ifdef SMP
291 if (smp_cpus > 1) {
292 int count = BUSY_SPINS;
293 if (count > 0) {
294 umtxq_unlock(key);
295 while (uc->uc_busy && --count > 0)
296 cpu_spinwait();
297 umtxq_lock(key);
298 }
299 }
300 #endif
301 while (uc->uc_busy) {
302 uc->uc_waiters++;
303 msleep(uc, &uc->uc_lock, 0, "umtxqb", 0);
304 uc->uc_waiters--;
305 }
306 }
307 uc->uc_busy = 1;
308 }
309
310 /*
311 * Unbusy a chain.
312 */
313 static inline void
314 umtxq_unbusy(struct umtx_key *key)
315 {
316 struct umtxq_chain *uc;
317
318 uc = umtxq_getchain(key);
319 mtx_assert(&uc->uc_lock, MA_OWNED);
320 KASSERT(uc->uc_busy != 0, ("not busy"));
321 uc->uc_busy = 0;
322 if (uc->uc_waiters)
323 wakeup_one(uc);
324 }
325
326 static inline void
327 umtxq_insert_queue(struct umtx_q *uq, int q)
328 {
329 struct umtxq_chain *uc;
330
331 uc = umtxq_getchain(&uq->uq_key);
332 UMTXQ_LOCKED_ASSERT(uc);
333 TAILQ_INSERT_TAIL(&uc->uc_queue[q], uq, uq_link);
334 uq->uq_flags |= UQF_UMTXQ;
335 }
336
337 static inline void
338 umtxq_remove_queue(struct umtx_q *uq, int q)
339 {
340 struct umtxq_chain *uc;
341
342 uc = umtxq_getchain(&uq->uq_key);
343 UMTXQ_LOCKED_ASSERT(uc);
344 if (uq->uq_flags & UQF_UMTXQ) {
345 TAILQ_REMOVE(&uc->uc_queue[q], uq, uq_link);
346 uq->uq_flags &= ~UQF_UMTXQ;
347 }
348 }
349
350 /*
351 * Check if there are multiple waiters
352 */
353 static int
354 umtxq_count(struct umtx_key *key)
355 {
356 struct umtxq_chain *uc;
357 struct umtx_q *uq;
358 int count = 0;
359
360 uc = umtxq_getchain(key);
361 UMTXQ_LOCKED_ASSERT(uc);
362 TAILQ_FOREACH(uq, &uc->uc_queue[UMTX_SHARED_QUEUE], uq_link) {
363 if (umtx_key_match(&uq->uq_key, key)) {
364 if (++count > 1)
365 break;
366 }
367 }
368 return (count);
369 }
370
371 /*
372 * Check if there are multiple PI waiters and returns first
373 * waiter.
374 */
375 static int
376 umtxq_count_pi(struct umtx_key *key, struct umtx_q **first)
377 {
378 struct umtxq_chain *uc;
379 struct umtx_q *uq;
380 int count = 0;
381
382 *first = NULL;
383 uc = umtxq_getchain(key);
384 UMTXQ_LOCKED_ASSERT(uc);
385 TAILQ_FOREACH(uq, &uc->uc_queue[UMTX_SHARED_QUEUE], uq_link) {
386 if (umtx_key_match(&uq->uq_key, key)) {
387 if (++count > 1)
388 break;
389 *first = uq;
390 }
391 }
392 return (count);
393 }
394
395 /*
396 * Wake up threads waiting on an userland object.
397 */
398
399 static int
400 umtxq_signal_queue(struct umtx_key *key, int n_wake, int q)
401 {
402 struct umtxq_chain *uc;
403 struct umtx_q *uq, *next;
404 int ret;
405
406 ret = 0;
407 uc = umtxq_getchain(key);
408 UMTXQ_LOCKED_ASSERT(uc);
409 TAILQ_FOREACH_SAFE(uq, &uc->uc_queue[q], uq_link, next) {
410 if (umtx_key_match(&uq->uq_key, key)) {
411 umtxq_remove_queue(uq, q);
412 wakeup(uq);
413 if (++ret >= n_wake)
414 break;
415 }
416 }
417 return (ret);
418 }
419
420
421 /*
422 * Wake up specified thread.
423 */
424 static inline void
425 umtxq_signal_thread(struct umtx_q *uq)
426 {
427 struct umtxq_chain *uc;
428
429 uc = umtxq_getchain(&uq->uq_key);
430 UMTXQ_LOCKED_ASSERT(uc);
431 umtxq_remove(uq);
432 wakeup(uq);
433 }
434
435 /*
436 * Put thread into sleep state, before sleeping, check if
437 * thread was removed from umtx queue.
438 */
439 static inline int
440 umtxq_sleep(struct umtx_q *uq, const char *wmesg, int timo)
441 {
442 struct umtxq_chain *uc;
443 int error;
444
445 uc = umtxq_getchain(&uq->uq_key);
446 UMTXQ_LOCKED_ASSERT(uc);
447 if (!(uq->uq_flags & UQF_UMTXQ))
448 return (0);
449 error = msleep(uq, &uc->uc_lock, PCATCH, wmesg, timo);
450 if (error == EWOULDBLOCK)
451 error = ETIMEDOUT;
452 return (error);
453 }
454
455 /*
456 * Convert userspace address into unique logical address.
457 */
458 int
459 umtx_key_get(void *addr, int type, int share, struct umtx_key *key)
460 {
461 struct thread *td = curthread;
462 vm_map_t map;
463 vm_map_entry_t entry;
464 vm_pindex_t pindex;
465 vm_prot_t prot;
466 boolean_t wired;
467
468 key->type = type;
469 if (share == THREAD_SHARE) {
470 key->shared = 0;
471 key->info.private.vs = td->td_proc->p_vmspace;
472 key->info.private.addr = (uintptr_t)addr;
473 } else {
474 MPASS(share == PROCESS_SHARE || share == AUTO_SHARE);
475 map = &td->td_proc->p_vmspace->vm_map;
476 if (vm_map_lookup(&map, (vm_offset_t)addr, VM_PROT_WRITE,
477 &entry, &key->info.shared.object, &pindex, &prot,
478 &wired) != KERN_SUCCESS) {
479 return EFAULT;
480 }
481
482 if ((share == PROCESS_SHARE) ||
483 (share == AUTO_SHARE &&
484 VM_INHERIT_SHARE == entry->inheritance)) {
485 key->shared = 1;
486 key->info.shared.offset = entry->offset + entry->start -
487 (vm_offset_t)addr;
488 vm_object_reference(key->info.shared.object);
489 } else {
490 key->shared = 0;
491 key->info.private.vs = td->td_proc->p_vmspace;
492 key->info.private.addr = (uintptr_t)addr;
493 }
494 vm_map_lookup_done(map, entry);
495 }
496
497 umtxq_hash(key);
498 return (0);
499 }
500
501 /*
502 * Release key.
503 */
504 void
505 umtx_key_release(struct umtx_key *key)
506 {
507 if (key->shared)
508 vm_object_deallocate(key->info.shared.object);
509 }
510
511 /*
512 * Lock a umtx object.
513 */
514 static int
515 _do_lock_umtx(struct thread *td, struct umtx *umtx, u_long id, int timo)
516 {
517 struct umtx_q *uq;
518 u_long owner;
519 u_long old;
520 int error = 0;
521
522 uq = td->td_umtxq;
523
524 /*
525 * Care must be exercised when dealing with umtx structure. It
526 * can fault on any access.
527 */
528 for (;;) {
529 /*
530 * Try the uncontested case. This should be done in userland.
531 */
532 owner = casuword(&umtx->u_owner, UMTX_UNOWNED, id);
533
534 /* The acquire succeeded. */
535 if (owner == UMTX_UNOWNED)
536 return (0);
537
538 /* The address was invalid. */
539 if (owner == -1)
540 return (EFAULT);
541
542 /* If no one owns it but it is contested try to acquire it. */
543 if (owner == UMTX_CONTESTED) {
544 owner = casuword(&umtx->u_owner,
545 UMTX_CONTESTED, id | UMTX_CONTESTED);
546
547 if (owner == UMTX_CONTESTED)
548 return (0);
549
550 /* The address was invalid. */
551 if (owner == -1)
552 return (EFAULT);
553
554 /* If this failed the lock has changed, restart. */
555 continue;
556 }
557
558 /*
559 * If we caught a signal, we have retried and now
560 * exit immediately.
561 */
562 if (error != 0)
563 return (error);
564
565 if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK,
566 AUTO_SHARE, &uq->uq_key)) != 0)
567 return (error);
568
569 umtxq_lock(&uq->uq_key);
570 umtxq_busy(&uq->uq_key);
571 umtxq_insert(uq);
572 umtxq_unbusy(&uq->uq_key);
573 umtxq_unlock(&uq->uq_key);
574
575 /*
576 * Set the contested bit so that a release in user space
577 * knows to use the system call for unlock. If this fails
578 * either some one else has acquired the lock or it has been
579 * released.
580 */
581 old = casuword(&umtx->u_owner, owner, owner | UMTX_CONTESTED);
582
583 /* The address was invalid. */
584 if (old == -1) {
585 umtxq_lock(&uq->uq_key);
586 umtxq_remove(uq);
587 umtxq_unlock(&uq->uq_key);
588 umtx_key_release(&uq->uq_key);
589 return (EFAULT);
590 }
591
592 /*
593 * We set the contested bit, sleep. Otherwise the lock changed
594 * and we need to retry or we lost a race to the thread
595 * unlocking the umtx.
596 */
597 umtxq_lock(&uq->uq_key);
598 if (old == owner)
599 error = umtxq_sleep(uq, "umtx", timo);
600 umtxq_remove(uq);
601 umtxq_unlock(&uq->uq_key);
602 umtx_key_release(&uq->uq_key);
603 }
604
605 return (0);
606 }
607
608 /*
609 * Lock a umtx object.
610 */
611 static int
612 do_lock_umtx(struct thread *td, struct umtx *umtx, u_long id,
613 struct timespec *timeout)
614 {
615 struct timespec ts, ts2, ts3;
616 struct timeval tv;
617 int error;
618
619 if (timeout == NULL) {
620 error = _do_lock_umtx(td, umtx, id, 0);
621 /* Mutex locking is restarted if it is interrupted. */
622 if (error == EINTR)
623 error = ERESTART;
624 } else {
625 getnanouptime(&ts);
626 timespecadd(&ts, timeout);
627 TIMESPEC_TO_TIMEVAL(&tv, timeout);
628 for (;;) {
629 error = _do_lock_umtx(td, umtx, id, tvtohz(&tv));
630 if (error != ETIMEDOUT)
631 break;
632 getnanouptime(&ts2);
633 if (timespeccmp(&ts2, &ts, >=)) {
634 error = ETIMEDOUT;
635 break;
636 }
637 ts3 = ts;
638 timespecsub(&ts3, &ts2);
639 TIMESPEC_TO_TIMEVAL(&tv, &ts3);
640 }
641 /* Timed-locking is not restarted. */
642 if (error == ERESTART)
643 error = EINTR;
644 }
645 return (error);
646 }
647
648 /*
649 * Unlock a umtx object.
650 */
651 static int
652 do_unlock_umtx(struct thread *td, struct umtx *umtx, u_long id)
653 {
654 struct umtx_key key;
655 u_long owner;
656 u_long old;
657 int error;
658 int count;
659
660 /*
661 * Make sure we own this mtx.
662 */
663 owner = fuword(__DEVOLATILE(u_long *, &umtx->u_owner));
664 if (owner == -1)
665 return (EFAULT);
666
667 if ((owner & ~UMTX_CONTESTED) != id)
668 return (EPERM);
669
670 /* This should be done in userland */
671 if ((owner & UMTX_CONTESTED) == 0) {
672 old = casuword(&umtx->u_owner, owner, UMTX_UNOWNED);
673 if (old == -1)
674 return (EFAULT);
675 if (old == owner)
676 return (0);
677 owner = old;
678 }
679
680 /* We should only ever be in here for contested locks */
681 if ((error = umtx_key_get(umtx, TYPE_SIMPLE_LOCK, AUTO_SHARE,
682 &key)) != 0)
683 return (error);
684
685 umtxq_lock(&key);
686 umtxq_busy(&key);
687 count = umtxq_count(&key);
688 umtxq_unlock(&key);
689
690 /*
691 * When unlocking the umtx, it must be marked as unowned if
692 * there is zero or one thread only waiting for it.
693 * Otherwise, it must be marked as contested.
694 */
695 old = casuword(&umtx->u_owner, owner,
696 count <= 1 ? UMTX_UNOWNED : UMTX_CONTESTED);
697 umtxq_lock(&key);
698 umtxq_signal(&key,1);
699 umtxq_unbusy(&key);
700 umtxq_unlock(&key);
701 umtx_key_release(&key);
702 if (old == -1)
703 return (EFAULT);
704 if (old != owner)
705 return (EINVAL);
706 return (0);
707 }
708
709 #ifdef COMPAT_IA32
710
711 /*
712 * Lock a umtx object.
713 */
714 static int
715 _do_lock_umtx32(struct thread *td, uint32_t *m, uint32_t id, int timo)
716 {
717 struct umtx_q *uq;
718 uint32_t owner;
719 uint32_t old;
720 int error = 0;
721
722 uq = td->td_umtxq;
723
724 /*
725 * Care must be exercised when dealing with umtx structure. It
726 * can fault on any access.
727 */
728 for (;;) {
729 /*
730 * Try the uncontested case. This should be done in userland.
731 */
732 owner = casuword32(m, UMUTEX_UNOWNED, id);
733
734 /* The acquire succeeded. */
735 if (owner == UMUTEX_UNOWNED)
736 return (0);
737
738 /* The address was invalid. */
739 if (owner == -1)
740 return (EFAULT);
741
742 /* If no one owns it but it is contested try to acquire it. */
743 if (owner == UMUTEX_CONTESTED) {
744 owner = casuword32(m,
745 UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
746 if (owner == UMUTEX_CONTESTED)
747 return (0);
748
749 /* The address was invalid. */
750 if (owner == -1)
751 return (EFAULT);
752
753 /* If this failed the lock has changed, restart. */
754 continue;
755 }
756
757 /*
758 * If we caught a signal, we have retried and now
759 * exit immediately.
760 */
761 if (error != 0)
762 return (error);
763
764 if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK,
765 AUTO_SHARE, &uq->uq_key)) != 0)
766 return (error);
767
768 umtxq_lock(&uq->uq_key);
769 umtxq_busy(&uq->uq_key);
770 umtxq_insert(uq);
771 umtxq_unbusy(&uq->uq_key);
772 umtxq_unlock(&uq->uq_key);
773
774 /*
775 * Set the contested bit so that a release in user space
776 * knows to use the system call for unlock. If this fails
777 * either some one else has acquired the lock or it has been
778 * released.
779 */
780 old = casuword32(m, owner, owner | UMUTEX_CONTESTED);
781
782 /* The address was invalid. */
783 if (old == -1) {
784 umtxq_lock(&uq->uq_key);
785 umtxq_remove(uq);
786 umtxq_unlock(&uq->uq_key);
787 umtx_key_release(&uq->uq_key);
788 return (EFAULT);
789 }
790
791 /*
792 * We set the contested bit, sleep. Otherwise the lock changed
793 * and we need to retry or we lost a race to the thread
794 * unlocking the umtx.
795 */
796 umtxq_lock(&uq->uq_key);
797 if (old == owner)
798 error = umtxq_sleep(uq, "umtx", timo);
799 umtxq_remove(uq);
800 umtxq_unlock(&uq->uq_key);
801 umtx_key_release(&uq->uq_key);
802 }
803
804 return (0);
805 }
806
807 /*
808 * Lock a umtx object.
809 */
810 static int
811 do_lock_umtx32(struct thread *td, void *m, uint32_t id,
812 struct timespec *timeout)
813 {
814 struct timespec ts, ts2, ts3;
815 struct timeval tv;
816 int error;
817
818 if (timeout == NULL) {
819 error = _do_lock_umtx32(td, m, id, 0);
820 /* Mutex locking is restarted if it is interrupted. */
821 if (error == EINTR)
822 error = ERESTART;
823 } else {
824 getnanouptime(&ts);
825 timespecadd(&ts, timeout);
826 TIMESPEC_TO_TIMEVAL(&tv, timeout);
827 for (;;) {
828 error = _do_lock_umtx32(td, m, id, tvtohz(&tv));
829 if (error != ETIMEDOUT)
830 break;
831 getnanouptime(&ts2);
832 if (timespeccmp(&ts2, &ts, >=)) {
833 error = ETIMEDOUT;
834 break;
835 }
836 ts3 = ts;
837 timespecsub(&ts3, &ts2);
838 TIMESPEC_TO_TIMEVAL(&tv, &ts3);
839 }
840 /* Timed-locking is not restarted. */
841 if (error == ERESTART)
842 error = EINTR;
843 }
844 return (error);
845 }
846
847 /*
848 * Unlock a umtx object.
849 */
850 static int
851 do_unlock_umtx32(struct thread *td, uint32_t *m, uint32_t id)
852 {
853 struct umtx_key key;
854 uint32_t owner;
855 uint32_t old;
856 int error;
857 int count;
858
859 /*
860 * Make sure we own this mtx.
861 */
862 owner = fuword32(m);
863 if (owner == -1)
864 return (EFAULT);
865
866 if ((owner & ~UMUTEX_CONTESTED) != id)
867 return (EPERM);
868
869 /* This should be done in userland */
870 if ((owner & UMUTEX_CONTESTED) == 0) {
871 old = casuword32(m, owner, UMUTEX_UNOWNED);
872 if (old == -1)
873 return (EFAULT);
874 if (old == owner)
875 return (0);
876 owner = old;
877 }
878
879 /* We should only ever be in here for contested locks */
880 if ((error = umtx_key_get(m, TYPE_SIMPLE_LOCK, AUTO_SHARE,
881 &key)) != 0)
882 return (error);
883
884 umtxq_lock(&key);
885 umtxq_busy(&key);
886 count = umtxq_count(&key);
887 umtxq_unlock(&key);
888
889 /*
890 * When unlocking the umtx, it must be marked as unowned if
891 * there is zero or one thread only waiting for it.
892 * Otherwise, it must be marked as contested.
893 */
894 old = casuword32(m, owner,
895 count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED);
896 umtxq_lock(&key);
897 umtxq_signal(&key,1);
898 umtxq_unbusy(&key);
899 umtxq_unlock(&key);
900 umtx_key_release(&key);
901 if (old == -1)
902 return (EFAULT);
903 if (old != owner)
904 return (EINVAL);
905 return (0);
906 }
907 #endif
908
909 /*
910 * Fetch and compare value, sleep on the address if value is not changed.
911 */
912 static int
913 do_wait(struct thread *td, void *addr, u_long id,
914 struct timespec *timeout, int compat32, int is_private)
915 {
916 struct umtx_q *uq;
917 struct timespec ts, ts2, ts3;
918 struct timeval tv;
919 u_long tmp;
920 int error = 0;
921
922 uq = td->td_umtxq;
923 if ((error = umtx_key_get(addr, TYPE_SIMPLE_WAIT,
924 is_private ? THREAD_SHARE : AUTO_SHARE, &uq->uq_key)) != 0)
925 return (error);
926
927 umtxq_lock(&uq->uq_key);
928 umtxq_insert(uq);
929 umtxq_unlock(&uq->uq_key);
930 if (compat32 == 0)
931 tmp = fuword(addr);
932 else
933 tmp = (unsigned int)fuword32(addr);
934 if (tmp != id) {
935 umtxq_lock(&uq->uq_key);
936 umtxq_remove(uq);
937 umtxq_unlock(&uq->uq_key);
938 } else if (timeout == NULL) {
939 umtxq_lock(&uq->uq_key);
940 error = umtxq_sleep(uq, "uwait", 0);
941 umtxq_remove(uq);
942 umtxq_unlock(&uq->uq_key);
943 } else {
944 getnanouptime(&ts);
945 timespecadd(&ts, timeout);
946 TIMESPEC_TO_TIMEVAL(&tv, timeout);
947 umtxq_lock(&uq->uq_key);
948 for (;;) {
949 error = umtxq_sleep(uq, "uwait", tvtohz(&tv));
950 if (!(uq->uq_flags & UQF_UMTXQ))
951 break;
952 if (error != ETIMEDOUT)
953 break;
954 umtxq_unlock(&uq->uq_key);
955 getnanouptime(&ts2);
956 if (timespeccmp(&ts2, &ts, >=)) {
957 error = ETIMEDOUT;
958 umtxq_lock(&uq->uq_key);
959 break;
960 }
961 ts3 = ts;
962 timespecsub(&ts3, &ts2);
963 TIMESPEC_TO_TIMEVAL(&tv, &ts3);
964 umtxq_lock(&uq->uq_key);
965 }
966 umtxq_remove(uq);
967 umtxq_unlock(&uq->uq_key);
968 }
969 umtx_key_release(&uq->uq_key);
970 if (error == ERESTART)
971 error = EINTR;
972 return (error);
973 }
974
975 /*
976 * Wake up threads sleeping on the specified address.
977 */
978 int
979 kern_umtx_wake(struct thread *td, void *uaddr, int n_wake, int is_private)
980 {
981 struct umtx_key key;
982 int ret;
983
984 if ((ret = umtx_key_get(uaddr, TYPE_SIMPLE_WAIT,
985 is_private ? THREAD_SHARE : AUTO_SHARE, &key)) != 0)
986 return (ret);
987 umtxq_lock(&key);
988 ret = umtxq_signal(&key, n_wake);
989 umtxq_unlock(&key);
990 umtx_key_release(&key);
991 return (0);
992 }
993
994 /*
995 * Lock PTHREAD_PRIO_NONE protocol POSIX mutex.
996 */
997 static int
998 _do_lock_normal(struct thread *td, struct umutex *m, uint32_t flags, int timo,
999 int mode)
1000 {
1001 struct umtx_q *uq;
1002 uint32_t owner, old, id;
1003 int error = 0;
1004
1005 id = td->td_tid;
1006 uq = td->td_umtxq;
1007
1008 /*
1009 * Care must be exercised when dealing with umtx structure. It
1010 * can fault on any access.
1011 */
1012 for (;;) {
1013 owner = fuword32(__DEVOLATILE(void *, &m->m_owner));
1014 if (mode == _UMUTEX_WAIT) {
1015 if (owner == UMUTEX_UNOWNED || owner == UMUTEX_CONTESTED)
1016 return (0);
1017 } else {
1018 /*
1019 * Try the uncontested case. This should be done in userland.
1020 */
1021 owner = casuword32(&m->m_owner, UMUTEX_UNOWNED, id);
1022
1023 /* The acquire succeeded. */
1024 if (owner == UMUTEX_UNOWNED)
1025 return (0);
1026
1027 /* The address was invalid. */
1028 if (owner == -1)
1029 return (EFAULT);
1030
1031 /* If no one owns it but it is contested try to acquire it. */
1032 if (owner == UMUTEX_CONTESTED) {
1033 owner = casuword32(&m->m_owner,
1034 UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
1035
1036 if (owner == UMUTEX_CONTESTED)
1037 return (0);
1038
1039 /* The address was invalid. */
1040 if (owner == -1)
1041 return (EFAULT);
1042
1043 /* If this failed the lock has changed, restart. */
1044 continue;
1045 }
1046 }
1047
1048 if ((flags & UMUTEX_ERROR_CHECK) != 0 &&
1049 (owner & ~UMUTEX_CONTESTED) == id)
1050 return (EDEADLK);
1051
1052 if (mode == _UMUTEX_TRY)
1053 return (EBUSY);
1054
1055 /*
1056 * If we caught a signal, we have retried and now
1057 * exit immediately.
1058 */
1059 if (error != 0)
1060 return (error);
1061
1062 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX,
1063 GET_SHARE(flags), &uq->uq_key)) != 0)
1064 return (error);
1065
1066 umtxq_lock(&uq->uq_key);
1067 umtxq_busy(&uq->uq_key);
1068 umtxq_insert(uq);
1069 umtxq_unlock(&uq->uq_key);
1070
1071 /*
1072 * Set the contested bit so that a release in user space
1073 * knows to use the system call for unlock. If this fails
1074 * either some one else has acquired the lock or it has been
1075 * released.
1076 */
1077 old = casuword32(&m->m_owner, owner, owner | UMUTEX_CONTESTED);
1078
1079 /* The address was invalid. */
1080 if (old == -1) {
1081 umtxq_lock(&uq->uq_key);
1082 umtxq_remove(uq);
1083 umtxq_unbusy(&uq->uq_key);
1084 umtxq_unlock(&uq->uq_key);
1085 umtx_key_release(&uq->uq_key);
1086 return (EFAULT);
1087 }
1088
1089 /*
1090 * We set the contested bit, sleep. Otherwise the lock changed
1091 * and we need to retry or we lost a race to the thread
1092 * unlocking the umtx.
1093 */
1094 umtxq_lock(&uq->uq_key);
1095 umtxq_unbusy(&uq->uq_key);
1096 if (old == owner)
1097 error = umtxq_sleep(uq, "umtxn", timo);
1098 umtxq_remove(uq);
1099 umtxq_unlock(&uq->uq_key);
1100 umtx_key_release(&uq->uq_key);
1101 }
1102
1103 return (0);
1104 }
1105
1106 /*
1107 * Lock PTHREAD_PRIO_NONE protocol POSIX mutex.
1108 */
1109 /*
1110 * Unlock PTHREAD_PRIO_NONE protocol POSIX mutex.
1111 */
1112 static int
1113 do_unlock_normal(struct thread *td, struct umutex *m, uint32_t flags)
1114 {
1115 struct umtx_key key;
1116 uint32_t owner, old, id;
1117 int error;
1118 int count;
1119
1120 id = td->td_tid;
1121 /*
1122 * Make sure we own this mtx.
1123 */
1124 owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
1125 if (owner == -1)
1126 return (EFAULT);
1127
1128 if ((owner & ~UMUTEX_CONTESTED) != id)
1129 return (EPERM);
1130
1131 if ((owner & UMUTEX_CONTESTED) == 0) {
1132 old = casuword32(&m->m_owner, owner, UMUTEX_UNOWNED);
1133 if (old == -1)
1134 return (EFAULT);
1135 if (old == owner)
1136 return (0);
1137 owner = old;
1138 }
1139
1140 /* We should only ever be in here for contested locks */
1141 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags),
1142 &key)) != 0)
1143 return (error);
1144
1145 umtxq_lock(&key);
1146 umtxq_busy(&key);
1147 count = umtxq_count(&key);
1148 umtxq_unlock(&key);
1149
1150 /*
1151 * When unlocking the umtx, it must be marked as unowned if
1152 * there is zero or one thread only waiting for it.
1153 * Otherwise, it must be marked as contested.
1154 */
1155 old = casuword32(&m->m_owner, owner,
1156 count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED);
1157 umtxq_lock(&key);
1158 umtxq_signal(&key,1);
1159 umtxq_unbusy(&key);
1160 umtxq_unlock(&key);
1161 umtx_key_release(&key);
1162 if (old == -1)
1163 return (EFAULT);
1164 if (old != owner)
1165 return (EINVAL);
1166 return (0);
1167 }
1168
1169 /*
1170 * Check if the mutex is available and wake up a waiter,
1171 * only for simple mutex.
1172 */
1173 static int
1174 do_wake_umutex(struct thread *td, struct umutex *m)
1175 {
1176 struct umtx_key key;
1177 uint32_t owner;
1178 uint32_t flags;
1179 int error;
1180 int count;
1181
1182 owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
1183 if (owner == -1)
1184 return (EFAULT);
1185
1186 if ((owner & ~UMUTEX_CONTESTED) != 0)
1187 return (0);
1188
1189 flags = fuword32(&m->m_flags);
1190
1191 /* We should only ever be in here for contested locks */
1192 if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags),
1193 &key)) != 0)
1194 return (error);
1195
1196 umtxq_lock(&key);
1197 umtxq_busy(&key);
1198 count = umtxq_count(&key);
1199 umtxq_unlock(&key);
1200
1201 if (count <= 1)
1202 owner = casuword32(&m->m_owner, UMUTEX_CONTESTED, UMUTEX_UNOWNED);
1203
1204 umtxq_lock(&key);
1205 if (count != 0 && (owner & ~UMUTEX_CONTESTED) == 0)
1206 umtxq_signal(&key, 1);
1207 umtxq_unbusy(&key);
1208 umtxq_unlock(&key);
1209 umtx_key_release(&key);
1210 return (0);
1211 }
1212
1213 static inline struct umtx_pi *
1214 umtx_pi_alloc(int flags)
1215 {
1216 struct umtx_pi *pi;
1217
1218 pi = uma_zalloc(umtx_pi_zone, M_ZERO | flags);
1219 TAILQ_INIT(&pi->pi_blocked);
1220 atomic_add_int(&umtx_pi_allocated, 1);
1221 return (pi);
1222 }
1223
1224 static inline void
1225 umtx_pi_free(struct umtx_pi *pi)
1226 {
1227 uma_zfree(umtx_pi_zone, pi);
1228 atomic_add_int(&umtx_pi_allocated, -1);
1229 }
1230
1231 /*
1232 * Adjust the thread's position on a pi_state after its priority has been
1233 * changed.
1234 */
1235 static int
1236 umtx_pi_adjust_thread(struct umtx_pi *pi, struct thread *td)
1237 {
1238 struct umtx_q *uq, *uq1, *uq2;
1239 struct thread *td1;
1240
1241 mtx_assert(&umtx_lock, MA_OWNED);
1242 if (pi == NULL)
1243 return (0);
1244
1245 uq = td->td_umtxq;
1246
1247 /*
1248 * Check if the thread needs to be moved on the blocked chain.
1249 * It needs to be moved if either its priority is lower than
1250 * the previous thread or higher than the next thread.
1251 */
1252 uq1 = TAILQ_PREV(uq, umtxq_head, uq_lockq);
1253 uq2 = TAILQ_NEXT(uq, uq_lockq);
1254 if ((uq1 != NULL && UPRI(td) < UPRI(uq1->uq_thread)) ||
1255 (uq2 != NULL && UPRI(td) > UPRI(uq2->uq_thread))) {
1256 /*
1257 * Remove thread from blocked chain and determine where
1258 * it should be moved to.
1259 */
1260 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq);
1261 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) {
1262 td1 = uq1->uq_thread;
1263 MPASS(td1->td_proc->p_magic == P_MAGIC);
1264 if (UPRI(td1) > UPRI(td))
1265 break;
1266 }
1267
1268 if (uq1 == NULL)
1269 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq);
1270 else
1271 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq);
1272 }
1273 return (1);
1274 }
1275
1276 /*
1277 * Propagate priority when a thread is blocked on POSIX
1278 * PI mutex.
1279 */
1280 static void
1281 umtx_propagate_priority(struct thread *td)
1282 {
1283 struct umtx_q *uq;
1284 struct umtx_pi *pi;
1285 int pri;
1286
1287 mtx_assert(&umtx_lock, MA_OWNED);
1288 pri = UPRI(td);
1289 uq = td->td_umtxq;
1290 pi = uq->uq_pi_blocked;
1291 if (pi == NULL)
1292 return;
1293
1294 for (;;) {
1295 td = pi->pi_owner;
1296 if (td == NULL)
1297 return;
1298
1299 MPASS(td->td_proc != NULL);
1300 MPASS(td->td_proc->p_magic == P_MAGIC);
1301
1302 if (UPRI(td) <= pri)
1303 return;
1304
1305 thread_lock(td);
1306 sched_lend_user_prio(td, pri);
1307 thread_unlock(td);
1308
1309 /*
1310 * Pick up the lock that td is blocked on.
1311 */
1312 uq = td->td_umtxq;
1313 pi = uq->uq_pi_blocked;
1314 /* Resort td on the list if needed. */
1315 if (!umtx_pi_adjust_thread(pi, td))
1316 break;
1317 }
1318 }
1319
1320 /*
1321 * Unpropagate priority for a PI mutex when a thread blocked on
1322 * it is interrupted by signal or resumed by others.
1323 */
1324 static void
1325 umtx_unpropagate_priority(struct umtx_pi *pi)
1326 {
1327 struct umtx_q *uq, *uq_owner;
1328 struct umtx_pi *pi2;
1329 int pri, oldpri;
1330
1331 mtx_assert(&umtx_lock, MA_OWNED);
1332
1333 while (pi != NULL && pi->pi_owner != NULL) {
1334 pri = PRI_MAX;
1335 uq_owner = pi->pi_owner->td_umtxq;
1336
1337 TAILQ_FOREACH(pi2, &uq_owner->uq_pi_contested, pi_link) {
1338 uq = TAILQ_FIRST(&pi2->pi_blocked);
1339 if (uq != NULL) {
1340 if (pri > UPRI(uq->uq_thread))
1341 pri = UPRI(uq->uq_thread);
1342 }
1343 }
1344
1345 if (pri > uq_owner->uq_inherited_pri)
1346 pri = uq_owner->uq_inherited_pri;
1347 thread_lock(pi->pi_owner);
1348 oldpri = pi->pi_owner->td_user_pri;
1349 sched_unlend_user_prio(pi->pi_owner, pri);
1350 thread_unlock(pi->pi_owner);
1351 if (uq_owner->uq_pi_blocked != NULL)
1352 umtx_pi_adjust_locked(pi->pi_owner, oldpri);
1353 pi = uq_owner->uq_pi_blocked;
1354 }
1355 }
1356
1357 /*
1358 * Insert a PI mutex into owned list.
1359 */
1360 static void
1361 umtx_pi_setowner(struct umtx_pi *pi, struct thread *owner)
1362 {
1363 struct umtx_q *uq_owner;
1364
1365 uq_owner = owner->td_umtxq;
1366 mtx_assert(&umtx_lock, MA_OWNED);
1367 if (pi->pi_owner != NULL)
1368 panic("pi_ower != NULL");
1369 pi->pi_owner = owner;
1370 TAILQ_INSERT_TAIL(&uq_owner->uq_pi_contested, pi, pi_link);
1371 }
1372
1373 /*
1374 * Claim ownership of a PI mutex.
1375 */
1376 static int
1377 umtx_pi_claim(struct umtx_pi *pi, struct thread *owner)
1378 {
1379 struct umtx_q *uq, *uq_owner;
1380
1381 uq_owner = owner->td_umtxq;
1382 mtx_lock_spin(&umtx_lock);
1383 if (pi->pi_owner == owner) {
1384 mtx_unlock_spin(&umtx_lock);
1385 return (0);
1386 }
1387
1388 if (pi->pi_owner != NULL) {
1389 /*
1390 * userland may have already messed the mutex, sigh.
1391 */
1392 mtx_unlock_spin(&umtx_lock);
1393 return (EPERM);
1394 }
1395 umtx_pi_setowner(pi, owner);
1396 uq = TAILQ_FIRST(&pi->pi_blocked);
1397 if (uq != NULL) {
1398 int pri;
1399
1400 pri = UPRI(uq->uq_thread);
1401 thread_lock(owner);
1402 if (pri < UPRI(owner))
1403 sched_lend_user_prio(owner, pri);
1404 thread_unlock(owner);
1405 }
1406 mtx_unlock_spin(&umtx_lock);
1407 return (0);
1408 }
1409
1410 static void
1411 umtx_pi_adjust_locked(struct thread *td, u_char oldpri)
1412 {
1413 struct umtx_q *uq;
1414 struct umtx_pi *pi;
1415
1416 uq = td->td_umtxq;
1417 /*
1418 * Pick up the lock that td is blocked on.
1419 */
1420 pi = uq->uq_pi_blocked;
1421 MPASS(pi != NULL);
1422
1423 /* Resort the turnstile on the list. */
1424 if (!umtx_pi_adjust_thread(pi, td))
1425 return;
1426
1427 /*
1428 * If our priority was lowered and we are at the head of the
1429 * turnstile, then propagate our new priority up the chain.
1430 */
1431 if (uq == TAILQ_FIRST(&pi->pi_blocked) && UPRI(td) < oldpri)
1432 umtx_propagate_priority(td);
1433 }
1434
1435 /*
1436 * Adjust a thread's order position in its blocked PI mutex,
1437 * this may result new priority propagating process.
1438 */
1439 void
1440 umtx_pi_adjust(struct thread *td, u_char oldpri)
1441 {
1442 struct umtx_q *uq;
1443 struct umtx_pi *pi;
1444
1445 uq = td->td_umtxq;
1446 mtx_lock_spin(&umtx_lock);
1447 /*
1448 * Pick up the lock that td is blocked on.
1449 */
1450 pi = uq->uq_pi_blocked;
1451 if (pi != NULL)
1452 umtx_pi_adjust_locked(td, oldpri);
1453 mtx_unlock_spin(&umtx_lock);
1454 }
1455
1456 /*
1457 * Sleep on a PI mutex.
1458 */
1459 static int
1460 umtxq_sleep_pi(struct umtx_q *uq, struct umtx_pi *pi,
1461 uint32_t owner, const char *wmesg, int timo)
1462 {
1463 struct umtxq_chain *uc;
1464 struct thread *td, *td1;
1465 struct umtx_q *uq1;
1466 int pri;
1467 int error = 0;
1468
1469 td = uq->uq_thread;
1470 KASSERT(td == curthread, ("inconsistent uq_thread"));
1471 uc = umtxq_getchain(&uq->uq_key);
1472 UMTXQ_LOCKED_ASSERT(uc);
1473 UMTXQ_BUSY_ASSERT(uc);
1474 umtxq_insert(uq);
1475 mtx_lock_spin(&umtx_lock);
1476 if (pi->pi_owner == NULL) {
1477 /* XXX
1478 * Current, We only support process private PI-mutex,
1479 * non-contended PI-mutexes are locked in userland.
1480 * Process shared PI-mutex should always be initialized
1481 * by kernel and be registered in kernel, locking should
1482 * always be done by kernel to avoid security problems.
1483 * For process private PI-mutex, we can find owner
1484 * thread and boost its priority safely.
1485 */
1486 mtx_unlock_spin(&umtx_lock);
1487 PROC_LOCK(curproc);
1488 td1 = thread_find(curproc, owner);
1489 mtx_lock_spin(&umtx_lock);
1490 if (td1 != NULL && pi->pi_owner == NULL) {
1491 uq1 = td1->td_umtxq;
1492 umtx_pi_setowner(pi, td1);
1493 }
1494 PROC_UNLOCK(curproc);
1495 }
1496
1497 TAILQ_FOREACH(uq1, &pi->pi_blocked, uq_lockq) {
1498 pri = UPRI(uq1->uq_thread);
1499 if (pri > UPRI(td))
1500 break;
1501 }
1502
1503 if (uq1 != NULL)
1504 TAILQ_INSERT_BEFORE(uq1, uq, uq_lockq);
1505 else
1506 TAILQ_INSERT_TAIL(&pi->pi_blocked, uq, uq_lockq);
1507
1508 uq->uq_pi_blocked = pi;
1509 thread_lock(td);
1510 td->td_flags |= TDF_UPIBLOCKED;
1511 thread_unlock(td);
1512 umtx_propagate_priority(td);
1513 mtx_unlock_spin(&umtx_lock);
1514 umtxq_unbusy(&uq->uq_key);
1515
1516 if (uq->uq_flags & UQF_UMTXQ) {
1517 error = msleep(uq, &uc->uc_lock, PCATCH, wmesg, timo);
1518 if (error == EWOULDBLOCK)
1519 error = ETIMEDOUT;
1520 if (uq->uq_flags & UQF_UMTXQ) {
1521 umtxq_remove(uq);
1522 }
1523 }
1524 mtx_lock_spin(&umtx_lock);
1525 uq->uq_pi_blocked = NULL;
1526 thread_lock(td);
1527 td->td_flags &= ~TDF_UPIBLOCKED;
1528 thread_unlock(td);
1529 TAILQ_REMOVE(&pi->pi_blocked, uq, uq_lockq);
1530 umtx_unpropagate_priority(pi);
1531 mtx_unlock_spin(&umtx_lock);
1532 umtxq_unlock(&uq->uq_key);
1533
1534 return (error);
1535 }
1536
1537 /*
1538 * Add reference count for a PI mutex.
1539 */
1540 static void
1541 umtx_pi_ref(struct umtx_pi *pi)
1542 {
1543 struct umtxq_chain *uc;
1544
1545 uc = umtxq_getchain(&pi->pi_key);
1546 UMTXQ_LOCKED_ASSERT(uc);
1547 pi->pi_refcount++;
1548 }
1549
1550 /*
1551 * Decrease reference count for a PI mutex, if the counter
1552 * is decreased to zero, its memory space is freed.
1553 */
1554 static void
1555 umtx_pi_unref(struct umtx_pi *pi)
1556 {
1557 struct umtxq_chain *uc;
1558
1559 uc = umtxq_getchain(&pi->pi_key);
1560 UMTXQ_LOCKED_ASSERT(uc);
1561 KASSERT(pi->pi_refcount > 0, ("invalid reference count"));
1562 if (--pi->pi_refcount == 0) {
1563 mtx_lock_spin(&umtx_lock);
1564 if (pi->pi_owner != NULL) {
1565 TAILQ_REMOVE(&pi->pi_owner->td_umtxq->uq_pi_contested,
1566 pi, pi_link);
1567 pi->pi_owner = NULL;
1568 }
1569 KASSERT(TAILQ_EMPTY(&pi->pi_blocked),
1570 ("blocked queue not empty"));
1571 mtx_unlock_spin(&umtx_lock);
1572 TAILQ_REMOVE(&uc->uc_pi_list, pi, pi_hashlink);
1573 umtx_pi_free(pi);
1574 }
1575 }
1576
1577 /*
1578 * Find a PI mutex in hash table.
1579 */
1580 static struct umtx_pi *
1581 umtx_pi_lookup(struct umtx_key *key)
1582 {
1583 struct umtxq_chain *uc;
1584 struct umtx_pi *pi;
1585
1586 uc = umtxq_getchain(key);
1587 UMTXQ_LOCKED_ASSERT(uc);
1588
1589 TAILQ_FOREACH(pi, &uc->uc_pi_list, pi_hashlink) {
1590 if (umtx_key_match(&pi->pi_key, key)) {
1591 return (pi);
1592 }
1593 }
1594 return (NULL);
1595 }
1596
1597 /*
1598 * Insert a PI mutex into hash table.
1599 */
1600 static inline void
1601 umtx_pi_insert(struct umtx_pi *pi)
1602 {
1603 struct umtxq_chain *uc;
1604
1605 uc = umtxq_getchain(&pi->pi_key);
1606 UMTXQ_LOCKED_ASSERT(uc);
1607 TAILQ_INSERT_TAIL(&uc->uc_pi_list, pi, pi_hashlink);
1608 }
1609
1610 /*
1611 * Lock a PI mutex.
1612 */
1613 static int
1614 _do_lock_pi(struct thread *td, struct umutex *m, uint32_t flags, int timo,
1615 int try)
1616 {
1617 struct umtx_q *uq;
1618 struct umtx_pi *pi, *new_pi;
1619 uint32_t id, owner, old;
1620 int error;
1621
1622 id = td->td_tid;
1623 uq = td->td_umtxq;
1624
1625 if ((error = umtx_key_get(m, TYPE_PI_UMUTEX, GET_SHARE(flags),
1626 &uq->uq_key)) != 0)
1627 return (error);
1628 umtxq_lock(&uq->uq_key);
1629 pi = umtx_pi_lookup(&uq->uq_key);
1630 if (pi == NULL) {
1631 new_pi = umtx_pi_alloc(M_NOWAIT);
1632 if (new_pi == NULL) {
1633 umtxq_unlock(&uq->uq_key);
1634 new_pi = umtx_pi_alloc(M_WAITOK);
1635 umtxq_lock(&uq->uq_key);
1636 pi = umtx_pi_lookup(&uq->uq_key);
1637 if (pi != NULL) {
1638 umtx_pi_free(new_pi);
1639 new_pi = NULL;
1640 }
1641 }
1642 if (new_pi != NULL) {
1643 new_pi->pi_key = uq->uq_key;
1644 umtx_pi_insert(new_pi);
1645 pi = new_pi;
1646 }
1647 }
1648 umtx_pi_ref(pi);
1649 umtxq_unlock(&uq->uq_key);
1650
1651 /*
1652 * Care must be exercised when dealing with umtx structure. It
1653 * can fault on any access.
1654 */
1655 for (;;) {
1656 /*
1657 * Try the uncontested case. This should be done in userland.
1658 */
1659 owner = casuword32(&m->m_owner, UMUTEX_UNOWNED, id);
1660
1661 /* The acquire succeeded. */
1662 if (owner == UMUTEX_UNOWNED) {
1663 error = 0;
1664 break;
1665 }
1666
1667 /* The address was invalid. */
1668 if (owner == -1) {
1669 error = EFAULT;
1670 break;
1671 }
1672
1673 /* If no one owns it but it is contested try to acquire it. */
1674 if (owner == UMUTEX_CONTESTED) {
1675 owner = casuword32(&m->m_owner,
1676 UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
1677
1678 if (owner == UMUTEX_CONTESTED) {
1679 umtxq_lock(&uq->uq_key);
1680 umtxq_busy(&uq->uq_key);
1681 error = umtx_pi_claim(pi, td);
1682 umtxq_unbusy(&uq->uq_key);
1683 umtxq_unlock(&uq->uq_key);
1684 break;
1685 }
1686
1687 /* The address was invalid. */
1688 if (owner == -1) {
1689 error = EFAULT;
1690 break;
1691 }
1692
1693 /* If this failed the lock has changed, restart. */
1694 continue;
1695 }
1696
1697 if ((flags & UMUTEX_ERROR_CHECK) != 0 &&
1698 (owner & ~UMUTEX_CONTESTED) == id) {
1699 error = EDEADLK;
1700 break;
1701 }
1702
1703 if (try != 0) {
1704 error = EBUSY;
1705 break;
1706 }
1707
1708 /*
1709 * If we caught a signal, we have retried and now
1710 * exit immediately.
1711 */
1712 if (error != 0)
1713 break;
1714
1715 umtxq_lock(&uq->uq_key);
1716 umtxq_busy(&uq->uq_key);
1717 umtxq_unlock(&uq->uq_key);
1718
1719 /*
1720 * Set the contested bit so that a release in user space
1721 * knows to use the system call for unlock. If this fails
1722 * either some one else has acquired the lock or it has been
1723 * released.
1724 */
1725 old = casuword32(&m->m_owner, owner, owner | UMUTEX_CONTESTED);
1726
1727 /* The address was invalid. */
1728 if (old == -1) {
1729 umtxq_lock(&uq->uq_key);
1730 umtxq_unbusy(&uq->uq_key);
1731 umtxq_unlock(&uq->uq_key);
1732 error = EFAULT;
1733 break;
1734 }
1735
1736 umtxq_lock(&uq->uq_key);
1737 /*
1738 * We set the contested bit, sleep. Otherwise the lock changed
1739 * and we need to retry or we lost a race to the thread
1740 * unlocking the umtx.
1741 */
1742 if (old == owner)
1743 error = umtxq_sleep_pi(uq, pi, owner & ~UMUTEX_CONTESTED,
1744 "umtxpi", timo);
1745 else {
1746 umtxq_unbusy(&uq->uq_key);
1747 umtxq_unlock(&uq->uq_key);
1748 }
1749 }
1750
1751 umtxq_lock(&uq->uq_key);
1752 umtx_pi_unref(pi);
1753 umtxq_unlock(&uq->uq_key);
1754
1755 umtx_key_release(&uq->uq_key);
1756 return (error);
1757 }
1758
1759 /*
1760 * Unlock a PI mutex.
1761 */
1762 static int
1763 do_unlock_pi(struct thread *td, struct umutex *m, uint32_t flags)
1764 {
1765 struct umtx_key key;
1766 struct umtx_q *uq_first, *uq_first2, *uq_me;
1767 struct umtx_pi *pi, *pi2;
1768 uint32_t owner, old, id;
1769 int error;
1770 int count;
1771 int pri;
1772
1773 id = td->td_tid;
1774 /*
1775 * Make sure we own this mtx.
1776 */
1777 owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
1778 if (owner == -1)
1779 return (EFAULT);
1780
1781 if ((owner & ~UMUTEX_CONTESTED) != id)
1782 return (EPERM);
1783
1784 /* This should be done in userland */
1785 if ((owner & UMUTEX_CONTESTED) == 0) {
1786 old = casuword32(&m->m_owner, owner, UMUTEX_UNOWNED);
1787 if (old == -1)
1788 return (EFAULT);
1789 if (old == owner)
1790 return (0);
1791 owner = old;
1792 }
1793
1794 /* We should only ever be in here for contested locks */
1795 if ((error = umtx_key_get(m, TYPE_PI_UMUTEX, GET_SHARE(flags),
1796 &key)) != 0)
1797 return (error);
1798
1799 umtxq_lock(&key);
1800 umtxq_busy(&key);
1801 count = umtxq_count_pi(&key, &uq_first);
1802 if (uq_first != NULL) {
1803 mtx_lock_spin(&umtx_lock);
1804 pi = uq_first->uq_pi_blocked;
1805 KASSERT(pi != NULL, ("pi == NULL?"));
1806 if (pi->pi_owner != curthread) {
1807 mtx_unlock_spin(&umtx_lock);
1808 umtxq_unbusy(&key);
1809 umtxq_unlock(&key);
1810 umtx_key_release(&key);
1811 /* userland messed the mutex */
1812 return (EPERM);
1813 }
1814 uq_me = curthread->td_umtxq;
1815 pi->pi_owner = NULL;
1816 TAILQ_REMOVE(&uq_me->uq_pi_contested, pi, pi_link);
1817 /* get highest priority thread which is still sleeping. */
1818 uq_first = TAILQ_FIRST(&pi->pi_blocked);
1819 while (uq_first != NULL &&
1820 (uq_first->uq_flags & UQF_UMTXQ) == 0) {
1821 uq_first = TAILQ_NEXT(uq_first, uq_lockq);
1822 }
1823 pri = PRI_MAX;
1824 TAILQ_FOREACH(pi2, &uq_me->uq_pi_contested, pi_link) {
1825 uq_first2 = TAILQ_FIRST(&pi2->pi_blocked);
1826 if (uq_first2 != NULL) {
1827 if (pri > UPRI(uq_first2->uq_thread))
1828 pri = UPRI(uq_first2->uq_thread);
1829 }
1830 }
1831 thread_lock(curthread);
1832 sched_unlend_user_prio(curthread, pri);
1833 thread_unlock(curthread);
1834 mtx_unlock_spin(&umtx_lock);
1835 if (uq_first)
1836 umtxq_signal_thread(uq_first);
1837 }
1838 umtxq_unlock(&key);
1839
1840 /*
1841 * When unlocking the umtx, it must be marked as unowned if
1842 * there is zero or one thread only waiting for it.
1843 * Otherwise, it must be marked as contested.
1844 */
1845 old = casuword32(&m->m_owner, owner,
1846 count <= 1 ? UMUTEX_UNOWNED : UMUTEX_CONTESTED);
1847
1848 umtxq_lock(&key);
1849 umtxq_unbusy(&key);
1850 umtxq_unlock(&key);
1851 umtx_key_release(&key);
1852 if (old == -1)
1853 return (EFAULT);
1854 if (old != owner)
1855 return (EINVAL);
1856 return (0);
1857 }
1858
1859 /*
1860 * Lock a PP mutex.
1861 */
1862 static int
1863 _do_lock_pp(struct thread *td, struct umutex *m, uint32_t flags, int timo,
1864 int try)
1865 {
1866 struct umtx_q *uq, *uq2;
1867 struct umtx_pi *pi;
1868 uint32_t ceiling;
1869 uint32_t owner, id;
1870 int error, pri, old_inherited_pri, su;
1871
1872 id = td->td_tid;
1873 uq = td->td_umtxq;
1874 if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags),
1875 &uq->uq_key)) != 0)
1876 return (error);
1877 su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0);
1878 for (;;) {
1879 old_inherited_pri = uq->uq_inherited_pri;
1880 umtxq_lock(&uq->uq_key);
1881 umtxq_busy(&uq->uq_key);
1882 umtxq_unlock(&uq->uq_key);
1883
1884 ceiling = RTP_PRIO_MAX - fuword32(&m->m_ceilings[0]);
1885 if (ceiling > RTP_PRIO_MAX) {
1886 error = EINVAL;
1887 goto out;
1888 }
1889
1890 mtx_lock_spin(&umtx_lock);
1891 if (UPRI(td) < PRI_MIN_REALTIME + ceiling) {
1892 mtx_unlock_spin(&umtx_lock);
1893 error = EINVAL;
1894 goto out;
1895 }
1896 if (su && PRI_MIN_REALTIME + ceiling < uq->uq_inherited_pri) {
1897 uq->uq_inherited_pri = PRI_MIN_REALTIME + ceiling;
1898 thread_lock(td);
1899 if (uq->uq_inherited_pri < UPRI(td))
1900 sched_lend_user_prio(td, uq->uq_inherited_pri);
1901 thread_unlock(td);
1902 }
1903 mtx_unlock_spin(&umtx_lock);
1904
1905 owner = casuword32(&m->m_owner,
1906 UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
1907
1908 if (owner == UMUTEX_CONTESTED) {
1909 error = 0;
1910 break;
1911 }
1912
1913 /* The address was invalid. */
1914 if (owner == -1) {
1915 error = EFAULT;
1916 break;
1917 }
1918
1919 if ((flags & UMUTEX_ERROR_CHECK) != 0 &&
1920 (owner & ~UMUTEX_CONTESTED) == id) {
1921 error = EDEADLK;
1922 break;
1923 }
1924
1925 if (try != 0) {
1926 error = EBUSY;
1927 break;
1928 }
1929
1930 /*
1931 * If we caught a signal, we have retried and now
1932 * exit immediately.
1933 */
1934 if (error != 0)
1935 break;
1936
1937 umtxq_lock(&uq->uq_key);
1938 umtxq_insert(uq);
1939 umtxq_unbusy(&uq->uq_key);
1940 error = umtxq_sleep(uq, "umtxpp", timo);
1941 umtxq_remove(uq);
1942 umtxq_unlock(&uq->uq_key);
1943
1944 mtx_lock_spin(&umtx_lock);
1945 uq->uq_inherited_pri = old_inherited_pri;
1946 pri = PRI_MAX;
1947 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
1948 uq2 = TAILQ_FIRST(&pi->pi_blocked);
1949 if (uq2 != NULL) {
1950 if (pri > UPRI(uq2->uq_thread))
1951 pri = UPRI(uq2->uq_thread);
1952 }
1953 }
1954 if (pri > uq->uq_inherited_pri)
1955 pri = uq->uq_inherited_pri;
1956 thread_lock(td);
1957 sched_unlend_user_prio(td, pri);
1958 thread_unlock(td);
1959 mtx_unlock_spin(&umtx_lock);
1960 }
1961
1962 if (error != 0) {
1963 mtx_lock_spin(&umtx_lock);
1964 uq->uq_inherited_pri = old_inherited_pri;
1965 pri = PRI_MAX;
1966 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
1967 uq2 = TAILQ_FIRST(&pi->pi_blocked);
1968 if (uq2 != NULL) {
1969 if (pri > UPRI(uq2->uq_thread))
1970 pri = UPRI(uq2->uq_thread);
1971 }
1972 }
1973 if (pri > uq->uq_inherited_pri)
1974 pri = uq->uq_inherited_pri;
1975 thread_lock(td);
1976 sched_unlend_user_prio(td, pri);
1977 thread_unlock(td);
1978 mtx_unlock_spin(&umtx_lock);
1979 }
1980
1981 out:
1982 umtxq_lock(&uq->uq_key);
1983 umtxq_unbusy(&uq->uq_key);
1984 umtxq_unlock(&uq->uq_key);
1985 umtx_key_release(&uq->uq_key);
1986 return (error);
1987 }
1988
1989 /*
1990 * Unlock a PP mutex.
1991 */
1992 static int
1993 do_unlock_pp(struct thread *td, struct umutex *m, uint32_t flags)
1994 {
1995 struct umtx_key key;
1996 struct umtx_q *uq, *uq2;
1997 struct umtx_pi *pi;
1998 uint32_t owner, id;
1999 uint32_t rceiling;
2000 int error, pri, new_inherited_pri, su;
2001
2002 id = td->td_tid;
2003 uq = td->td_umtxq;
2004 su = (priv_check(td, PRIV_SCHED_RTPRIO) == 0);
2005
2006 /*
2007 * Make sure we own this mtx.
2008 */
2009 owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
2010 if (owner == -1)
2011 return (EFAULT);
2012
2013 if ((owner & ~UMUTEX_CONTESTED) != id)
2014 return (EPERM);
2015
2016 error = copyin(&m->m_ceilings[1], &rceiling, sizeof(uint32_t));
2017 if (error != 0)
2018 return (error);
2019
2020 if (rceiling == -1)
2021 new_inherited_pri = PRI_MAX;
2022 else {
2023 rceiling = RTP_PRIO_MAX - rceiling;
2024 if (rceiling > RTP_PRIO_MAX)
2025 return (EINVAL);
2026 new_inherited_pri = PRI_MIN_REALTIME + rceiling;
2027 }
2028
2029 if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags),
2030 &key)) != 0)
2031 return (error);
2032 umtxq_lock(&key);
2033 umtxq_busy(&key);
2034 umtxq_unlock(&key);
2035 /*
2036 * For priority protected mutex, always set unlocked state
2037 * to UMUTEX_CONTESTED, so that userland always enters kernel
2038 * to lock the mutex, it is necessary because thread priority
2039 * has to be adjusted for such mutex.
2040 */
2041 error = suword32(__DEVOLATILE(uint32_t *, &m->m_owner),
2042 UMUTEX_CONTESTED);
2043
2044 umtxq_lock(&key);
2045 if (error == 0)
2046 umtxq_signal(&key, 1);
2047 umtxq_unbusy(&key);
2048 umtxq_unlock(&key);
2049
2050 if (error == -1)
2051 error = EFAULT;
2052 else {
2053 mtx_lock_spin(&umtx_lock);
2054 if (su != 0)
2055 uq->uq_inherited_pri = new_inherited_pri;
2056 pri = PRI_MAX;
2057 TAILQ_FOREACH(pi, &uq->uq_pi_contested, pi_link) {
2058 uq2 = TAILQ_FIRST(&pi->pi_blocked);
2059 if (uq2 != NULL) {
2060 if (pri > UPRI(uq2->uq_thread))
2061 pri = UPRI(uq2->uq_thread);
2062 }
2063 }
2064 if (pri > uq->uq_inherited_pri)
2065 pri = uq->uq_inherited_pri;
2066 thread_lock(td);
2067 sched_unlend_user_prio(td, pri);
2068 thread_unlock(td);
2069 mtx_unlock_spin(&umtx_lock);
2070 }
2071 umtx_key_release(&key);
2072 return (error);
2073 }
2074
2075 static int
2076 do_set_ceiling(struct thread *td, struct umutex *m, uint32_t ceiling,
2077 uint32_t *old_ceiling)
2078 {
2079 struct umtx_q *uq;
2080 uint32_t save_ceiling;
2081 uint32_t owner, id;
2082 uint32_t flags;
2083 int error;
2084
2085 flags = fuword32(&m->m_flags);
2086 if ((flags & UMUTEX_PRIO_PROTECT) == 0)
2087 return (EINVAL);
2088 if (ceiling > RTP_PRIO_MAX)
2089 return (EINVAL);
2090 id = td->td_tid;
2091 uq = td->td_umtxq;
2092 if ((error = umtx_key_get(m, TYPE_PP_UMUTEX, GET_SHARE(flags),
2093 &uq->uq_key)) != 0)
2094 return (error);
2095 for (;;) {
2096 umtxq_lock(&uq->uq_key);
2097 umtxq_busy(&uq->uq_key);
2098 umtxq_unlock(&uq->uq_key);
2099
2100 save_ceiling = fuword32(&m->m_ceilings[0]);
2101
2102 owner = casuword32(&m->m_owner,
2103 UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
2104
2105 if (owner == UMUTEX_CONTESTED) {
2106 suword32(&m->m_ceilings[0], ceiling);
2107 suword32(__DEVOLATILE(uint32_t *, &m->m_owner),
2108 UMUTEX_CONTESTED);
2109 error = 0;
2110 break;
2111 }
2112
2113 /* The address was invalid. */
2114 if (owner == -1) {
2115 error = EFAULT;
2116 break;
2117 }
2118
2119 if ((owner & ~UMUTEX_CONTESTED) == id) {
2120 suword32(&m->m_ceilings[0], ceiling);
2121 error = 0;
2122 break;
2123 }
2124
2125 /*
2126 * If we caught a signal, we have retried and now
2127 * exit immediately.
2128 */
2129 if (error != 0)
2130 break;
2131
2132 /*
2133 * We set the contested bit, sleep. Otherwise the lock changed
2134 * and we need to retry or we lost a race to the thread
2135 * unlocking the umtx.
2136 */
2137 umtxq_lock(&uq->uq_key);
2138 umtxq_insert(uq);
2139 umtxq_unbusy(&uq->uq_key);
2140 error = umtxq_sleep(uq, "umtxpp", 0);
2141 umtxq_remove(uq);
2142 umtxq_unlock(&uq->uq_key);
2143 }
2144 umtxq_lock(&uq->uq_key);
2145 if (error == 0)
2146 umtxq_signal(&uq->uq_key, INT_MAX);
2147 umtxq_unbusy(&uq->uq_key);
2148 umtxq_unlock(&uq->uq_key);
2149 umtx_key_release(&uq->uq_key);
2150 if (error == 0 && old_ceiling != NULL)
2151 suword32(old_ceiling, save_ceiling);
2152 return (error);
2153 }
2154
2155 static int
2156 _do_lock_umutex(struct thread *td, struct umutex *m, int flags, int timo,
2157 int mode)
2158 {
2159 switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) {
2160 case 0:
2161 return (_do_lock_normal(td, m, flags, timo, mode));
2162 case UMUTEX_PRIO_INHERIT:
2163 return (_do_lock_pi(td, m, flags, timo, mode));
2164 case UMUTEX_PRIO_PROTECT:
2165 return (_do_lock_pp(td, m, flags, timo, mode));
2166 }
2167 return (EINVAL);
2168 }
2169
2170 /*
2171 * Lock a userland POSIX mutex.
2172 */
2173 static int
2174 do_lock_umutex(struct thread *td, struct umutex *m,
2175 struct timespec *timeout, int mode)
2176 {
2177 struct timespec ts, ts2, ts3;
2178 struct timeval tv;
2179 uint32_t flags;
2180 int error;
2181
2182 flags = fuword32(&m->m_flags);
2183 if (flags == -1)
2184 return (EFAULT);
2185
2186 if (timeout == NULL) {
2187 error = _do_lock_umutex(td, m, flags, 0, mode);
2188 /* Mutex locking is restarted if it is interrupted. */
2189 if (error == EINTR && mode != _UMUTEX_WAIT)
2190 error = ERESTART;
2191 } else {
2192 getnanouptime(&ts);
2193 timespecadd(&ts, timeout);
2194 TIMESPEC_TO_TIMEVAL(&tv, timeout);
2195 for (;;) {
2196 error = _do_lock_umutex(td, m, flags, tvtohz(&tv), mode);
2197 if (error != ETIMEDOUT)
2198 break;
2199 getnanouptime(&ts2);
2200 if (timespeccmp(&ts2, &ts, >=)) {
2201 error = ETIMEDOUT;
2202 break;
2203 }
2204 ts3 = ts;
2205 timespecsub(&ts3, &ts2);
2206 TIMESPEC_TO_TIMEVAL(&tv, &ts3);
2207 }
2208 /* Timed-locking is not restarted. */
2209 if (error == ERESTART)
2210 error = EINTR;
2211 }
2212 return (error);
2213 }
2214
2215 /*
2216 * Unlock a userland POSIX mutex.
2217 */
2218 static int
2219 do_unlock_umutex(struct thread *td, struct umutex *m)
2220 {
2221 uint32_t flags;
2222
2223 flags = fuword32(&m->m_flags);
2224 if (flags == -1)
2225 return (EFAULT);
2226
2227 switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) {
2228 case 0:
2229 return (do_unlock_normal(td, m, flags));
2230 case UMUTEX_PRIO_INHERIT:
2231 return (do_unlock_pi(td, m, flags));
2232 case UMUTEX_PRIO_PROTECT:
2233 return (do_unlock_pp(td, m, flags));
2234 }
2235
2236 return (EINVAL);
2237 }
2238
2239 static int
2240 do_cv_wait(struct thread *td, struct ucond *cv, struct umutex *m,
2241 struct timespec *timeout, u_long wflags)
2242 {
2243 struct umtx_q *uq;
2244 struct timeval tv;
2245 struct timespec cts, ets, tts;
2246 uint32_t flags;
2247 int error;
2248
2249 uq = td->td_umtxq;
2250 flags = fuword32(&cv->c_flags);
2251 error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &uq->uq_key);
2252 if (error != 0)
2253 return (error);
2254 umtxq_lock(&uq->uq_key);
2255 umtxq_busy(&uq->uq_key);
2256 umtxq_insert(uq);
2257 umtxq_unlock(&uq->uq_key);
2258
2259 /*
2260 * The magic thing is we should set c_has_waiters to 1 before
2261 * releasing user mutex.
2262 */
2263 suword32(__DEVOLATILE(uint32_t *, &cv->c_has_waiters), 1);
2264
2265 umtxq_lock(&uq->uq_key);
2266 umtxq_unbusy(&uq->uq_key);
2267 umtxq_unlock(&uq->uq_key);
2268
2269 error = do_unlock_umutex(td, m);
2270
2271 umtxq_lock(&uq->uq_key);
2272 if (error == 0) {
2273 if ((wflags & UMTX_CHECK_UNPARKING) &&
2274 (td->td_pflags & TDP_WAKEUP)) {
2275 td->td_pflags &= ~TDP_WAKEUP;
2276 error = EINTR;
2277 } else if (timeout == NULL) {
2278 error = umtxq_sleep(uq, "ucond", 0);
2279 } else {
2280 getnanouptime(&ets);
2281 timespecadd(&ets, timeout);
2282 TIMESPEC_TO_TIMEVAL(&tv, timeout);
2283 for (;;) {
2284 error = umtxq_sleep(uq, "ucond", tvtohz(&tv));
2285 if (error != ETIMEDOUT)
2286 break;
2287 getnanouptime(&cts);
2288 if (timespeccmp(&cts, &ets, >=)) {
2289 error = ETIMEDOUT;
2290 break;
2291 }
2292 tts = ets;
2293 timespecsub(&tts, &cts);
2294 TIMESPEC_TO_TIMEVAL(&tv, &tts);
2295 }
2296 }
2297 }
2298
2299 if (error != 0) {
2300 if ((uq->uq_flags & UQF_UMTXQ) == 0) {
2301 /*
2302 * If we concurrently got do_cv_signal()d
2303 * and we got an error or UNIX signals or a timeout,
2304 * then, perform another umtxq_signal to avoid
2305 * consuming the wakeup. This may cause supurious
2306 * wakeup for another thread which was just queued,
2307 * but SUSV3 explicitly allows supurious wakeup to
2308 * occur, and indeed a kernel based implementation
2309 * can not avoid it.
2310 */
2311 if (!umtxq_signal(&uq->uq_key, 1))
2312 error = 0;
2313 }
2314 if (error == ERESTART)
2315 error = EINTR;
2316 }
2317 umtxq_remove(uq);
2318 umtxq_unlock(&uq->uq_key);
2319 umtx_key_release(&uq->uq_key);
2320 return (error);
2321 }
2322
2323 /*
2324 * Signal a userland condition variable.
2325 */
2326 static int
2327 do_cv_signal(struct thread *td, struct ucond *cv)
2328 {
2329 struct umtx_key key;
2330 int error, cnt, nwake;
2331 uint32_t flags;
2332
2333 flags = fuword32(&cv->c_flags);
2334 if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0)
2335 return (error);
2336 umtxq_lock(&key);
2337 umtxq_busy(&key);
2338 cnt = umtxq_count(&key);
2339 nwake = umtxq_signal(&key, 1);
2340 if (cnt <= nwake) {
2341 umtxq_unlock(&key);
2342 error = suword32(
2343 __DEVOLATILE(uint32_t *, &cv->c_has_waiters), 0);
2344 umtxq_lock(&key);
2345 }
2346 umtxq_unbusy(&key);
2347 umtxq_unlock(&key);
2348 umtx_key_release(&key);
2349 return (error);
2350 }
2351
2352 static int
2353 do_cv_broadcast(struct thread *td, struct ucond *cv)
2354 {
2355 struct umtx_key key;
2356 int error;
2357 uint32_t flags;
2358
2359 flags = fuword32(&cv->c_flags);
2360 if ((error = umtx_key_get(cv, TYPE_CV, GET_SHARE(flags), &key)) != 0)
2361 return (error);
2362
2363 umtxq_lock(&key);
2364 umtxq_busy(&key);
2365 umtxq_signal(&key, INT_MAX);
2366 umtxq_unlock(&key);
2367
2368 error = suword32(__DEVOLATILE(uint32_t *, &cv->c_has_waiters), 0);
2369
2370 umtxq_lock(&key);
2371 umtxq_unbusy(&key);
2372 umtxq_unlock(&key);
2373
2374 umtx_key_release(&key);
2375 return (error);
2376 }
2377
2378 static int
2379 do_rw_rdlock(struct thread *td, struct urwlock *rwlock, long fflag, int timo)
2380 {
2381 struct umtx_q *uq;
2382 uint32_t flags, wrflags;
2383 int32_t state, oldstate;
2384 int32_t blocked_readers;
2385 int error;
2386
2387 uq = td->td_umtxq;
2388 flags = fuword32(&rwlock->rw_flags);
2389 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key);
2390 if (error != 0)
2391 return (error);
2392
2393 wrflags = URWLOCK_WRITE_OWNER;
2394 if (!(fflag & URWLOCK_PREFER_READER) && !(flags & URWLOCK_PREFER_READER))
2395 wrflags |= URWLOCK_WRITE_WAITERS;
2396
2397 for (;;) {
2398 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2399 /* try to lock it */
2400 while (!(state & wrflags)) {
2401 if (__predict_false(URWLOCK_READER_COUNT(state) == URWLOCK_MAX_READERS)) {
2402 umtx_key_release(&uq->uq_key);
2403 return (EAGAIN);
2404 }
2405 oldstate = casuword32(&rwlock->rw_state, state, state + 1);
2406 if (oldstate == state) {
2407 umtx_key_release(&uq->uq_key);
2408 return (0);
2409 }
2410 state = oldstate;
2411 }
2412
2413 if (error)
2414 break;
2415
2416 /* grab monitor lock */
2417 umtxq_lock(&uq->uq_key);
2418 umtxq_busy(&uq->uq_key);
2419 umtxq_unlock(&uq->uq_key);
2420
2421 /*
2422 * re-read the state, in case it changed between the try-lock above
2423 * and the check below
2424 */
2425 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2426
2427 /* set read contention bit */
2428 while ((state & wrflags) && !(state & URWLOCK_READ_WAITERS)) {
2429 oldstate = casuword32(&rwlock->rw_state, state, state | URWLOCK_READ_WAITERS);
2430 if (oldstate == state)
2431 goto sleep;
2432 state = oldstate;
2433 }
2434
2435 /* state is changed while setting flags, restart */
2436 if (!(state & wrflags)) {
2437 umtxq_lock(&uq->uq_key);
2438 umtxq_unbusy(&uq->uq_key);
2439 umtxq_unlock(&uq->uq_key);
2440 continue;
2441 }
2442
2443 sleep:
2444 /* contention bit is set, before sleeping, increase read waiter count */
2445 blocked_readers = fuword32(&rwlock->rw_blocked_readers);
2446 suword32(&rwlock->rw_blocked_readers, blocked_readers+1);
2447
2448 while (state & wrflags) {
2449 umtxq_lock(&uq->uq_key);
2450 umtxq_insert(uq);
2451 umtxq_unbusy(&uq->uq_key);
2452
2453 error = umtxq_sleep(uq, "urdlck", timo);
2454
2455 umtxq_busy(&uq->uq_key);
2456 umtxq_remove(uq);
2457 umtxq_unlock(&uq->uq_key);
2458 if (error)
2459 break;
2460 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2461 }
2462
2463 /* decrease read waiter count, and may clear read contention bit */
2464 blocked_readers = fuword32(&rwlock->rw_blocked_readers);
2465 suword32(&rwlock->rw_blocked_readers, blocked_readers-1);
2466 if (blocked_readers == 1) {
2467 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2468 for (;;) {
2469 oldstate = casuword32(&rwlock->rw_state, state,
2470 state & ~URWLOCK_READ_WAITERS);
2471 if (oldstate == state)
2472 break;
2473 state = oldstate;
2474 }
2475 }
2476
2477 umtxq_lock(&uq->uq_key);
2478 umtxq_unbusy(&uq->uq_key);
2479 umtxq_unlock(&uq->uq_key);
2480 }
2481 umtx_key_release(&uq->uq_key);
2482 return (error);
2483 }
2484
2485 static int
2486 do_rw_rdlock2(struct thread *td, void *obj, long val, struct timespec *timeout)
2487 {
2488 struct timespec ts, ts2, ts3;
2489 struct timeval tv;
2490 int error;
2491
2492 getnanouptime(&ts);
2493 timespecadd(&ts, timeout);
2494 TIMESPEC_TO_TIMEVAL(&tv, timeout);
2495 for (;;) {
2496 error = do_rw_rdlock(td, obj, val, tvtohz(&tv));
2497 if (error != ETIMEDOUT)
2498 break;
2499 getnanouptime(&ts2);
2500 if (timespeccmp(&ts2, &ts, >=)) {
2501 error = ETIMEDOUT;
2502 break;
2503 }
2504 ts3 = ts;
2505 timespecsub(&ts3, &ts2);
2506 TIMESPEC_TO_TIMEVAL(&tv, &ts3);
2507 }
2508 if (error == ERESTART)
2509 error = EINTR;
2510 return (error);
2511 }
2512
2513 static int
2514 do_rw_wrlock(struct thread *td, struct urwlock *rwlock, int timo)
2515 {
2516 struct umtx_q *uq;
2517 uint32_t flags;
2518 int32_t state, oldstate;
2519 int32_t blocked_writers;
2520 int32_t blocked_readers;
2521 int error;
2522
2523 uq = td->td_umtxq;
2524 flags = fuword32(&rwlock->rw_flags);
2525 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key);
2526 if (error != 0)
2527 return (error);
2528
2529 blocked_readers = 0;
2530 for (;;) {
2531 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2532 while (!(state & URWLOCK_WRITE_OWNER) && URWLOCK_READER_COUNT(state) == 0) {
2533 oldstate = casuword32(&rwlock->rw_state, state, state | URWLOCK_WRITE_OWNER);
2534 if (oldstate == state) {
2535 umtx_key_release(&uq->uq_key);
2536 return (0);
2537 }
2538 state = oldstate;
2539 }
2540
2541 if (error) {
2542 if (!(state & (URWLOCK_WRITE_OWNER|URWLOCK_WRITE_WAITERS)) &&
2543 blocked_readers != 0) {
2544 umtxq_lock(&uq->uq_key);
2545 umtxq_busy(&uq->uq_key);
2546 umtxq_signal_queue(&uq->uq_key, INT_MAX, UMTX_SHARED_QUEUE);
2547 umtxq_unbusy(&uq->uq_key);
2548 umtxq_unlock(&uq->uq_key);
2549 }
2550
2551 break;
2552 }
2553
2554 /* grab monitor lock */
2555 umtxq_lock(&uq->uq_key);
2556 umtxq_busy(&uq->uq_key);
2557 umtxq_unlock(&uq->uq_key);
2558
2559 /*
2560 * re-read the state, in case it changed between the try-lock above
2561 * and the check below
2562 */
2563 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2564
2565 while (((state & URWLOCK_WRITE_OWNER) || URWLOCK_READER_COUNT(state) != 0) &&
2566 (state & URWLOCK_WRITE_WAITERS) == 0) {
2567 oldstate = casuword32(&rwlock->rw_state, state, state | URWLOCK_WRITE_WAITERS);
2568 if (oldstate == state)
2569 goto sleep;
2570 state = oldstate;
2571 }
2572
2573 if (!(state & URWLOCK_WRITE_OWNER) && URWLOCK_READER_COUNT(state) == 0) {
2574 umtxq_lock(&uq->uq_key);
2575 umtxq_unbusy(&uq->uq_key);
2576 umtxq_unlock(&uq->uq_key);
2577 continue;
2578 }
2579 sleep:
2580 blocked_writers = fuword32(&rwlock->rw_blocked_writers);
2581 suword32(&rwlock->rw_blocked_writers, blocked_writers+1);
2582
2583 while ((state & URWLOCK_WRITE_OWNER) || URWLOCK_READER_COUNT(state) != 0) {
2584 umtxq_lock(&uq->uq_key);
2585 umtxq_insert_queue(uq, UMTX_EXCLUSIVE_QUEUE);
2586 umtxq_unbusy(&uq->uq_key);
2587
2588 error = umtxq_sleep(uq, "uwrlck", timo);
2589
2590 umtxq_busy(&uq->uq_key);
2591 umtxq_remove_queue(uq, UMTX_EXCLUSIVE_QUEUE);
2592 umtxq_unlock(&uq->uq_key);
2593 if (error)
2594 break;
2595 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2596 }
2597
2598 blocked_writers = fuword32(&rwlock->rw_blocked_writers);
2599 suword32(&rwlock->rw_blocked_writers, blocked_writers-1);
2600 if (blocked_writers == 1) {
2601 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2602 for (;;) {
2603 oldstate = casuword32(&rwlock->rw_state, state,
2604 state & ~URWLOCK_WRITE_WAITERS);
2605 if (oldstate == state)
2606 break;
2607 state = oldstate;
2608 }
2609 blocked_readers = fuword32(&rwlock->rw_blocked_readers);
2610 } else
2611 blocked_readers = 0;
2612
2613 umtxq_lock(&uq->uq_key);
2614 umtxq_unbusy(&uq->uq_key);
2615 umtxq_unlock(&uq->uq_key);
2616 }
2617
2618 umtx_key_release(&uq->uq_key);
2619 return (error);
2620 }
2621
2622 static int
2623 do_rw_wrlock2(struct thread *td, void *obj, struct timespec *timeout)
2624 {
2625 struct timespec ts, ts2, ts3;
2626 struct timeval tv;
2627 int error;
2628
2629 getnanouptime(&ts);
2630 timespecadd(&ts, timeout);
2631 TIMESPEC_TO_TIMEVAL(&tv, timeout);
2632 for (;;) {
2633 error = do_rw_wrlock(td, obj, tvtohz(&tv));
2634 if (error != ETIMEDOUT)
2635 break;
2636 getnanouptime(&ts2);
2637 if (timespeccmp(&ts2, &ts, >=)) {
2638 error = ETIMEDOUT;
2639 break;
2640 }
2641 ts3 = ts;
2642 timespecsub(&ts3, &ts2);
2643 TIMESPEC_TO_TIMEVAL(&tv, &ts3);
2644 }
2645 if (error == ERESTART)
2646 error = EINTR;
2647 return (error);
2648 }
2649
2650 static int
2651 do_rw_unlock(struct thread *td, struct urwlock *rwlock)
2652 {
2653 struct umtx_q *uq;
2654 uint32_t flags;
2655 int32_t state, oldstate;
2656 int error, q, count;
2657
2658 uq = td->td_umtxq;
2659 flags = fuword32(&rwlock->rw_flags);
2660 error = umtx_key_get(rwlock, TYPE_RWLOCK, GET_SHARE(flags), &uq->uq_key);
2661 if (error != 0)
2662 return (error);
2663
2664 state = fuword32(__DEVOLATILE(int32_t *, &rwlock->rw_state));
2665 if (state & URWLOCK_WRITE_OWNER) {
2666 for (;;) {
2667 oldstate = casuword32(&rwlock->rw_state, state,
2668 state & ~URWLOCK_WRITE_OWNER);
2669 if (oldstate != state) {
2670 state = oldstate;
2671 if (!(oldstate & URWLOCK_WRITE_OWNER)) {
2672 error = EPERM;
2673 goto out;
2674 }
2675 } else
2676 break;
2677 }
2678 } else if (URWLOCK_READER_COUNT(state) != 0) {
2679 for (;;) {
2680 oldstate = casuword32(&rwlock->rw_state, state,
2681 state - 1);
2682 if (oldstate != state) {
2683 state = oldstate;
2684 if (URWLOCK_READER_COUNT(oldstate) == 0) {
2685 error = EPERM;
2686 goto out;
2687 }
2688 }
2689 else
2690 break;
2691 }
2692 } else {
2693 error = EPERM;
2694 goto out;
2695 }
2696
2697 count = 0;
2698
2699 if (!(flags & URWLOCK_PREFER_READER)) {
2700 if (state & URWLOCK_WRITE_WAITERS) {
2701 count = 1;
2702 q = UMTX_EXCLUSIVE_QUEUE;
2703 } else if (state & URWLOCK_READ_WAITERS) {
2704 count = INT_MAX;
2705 q = UMTX_SHARED_QUEUE;
2706 }
2707 } else {
2708 if (state & URWLOCK_READ_WAITERS) {
2709 count = INT_MAX;
2710 q = UMTX_SHARED_QUEUE;
2711 } else if (state & URWLOCK_WRITE_WAITERS) {
2712 count = 1;
2713 q = UMTX_EXCLUSIVE_QUEUE;
2714 }
2715 }
2716
2717 if (count) {
2718 umtxq_lock(&uq->uq_key);
2719 umtxq_busy(&uq->uq_key);
2720 umtxq_signal_queue(&uq->uq_key, count, q);
2721 umtxq_unbusy(&uq->uq_key);
2722 umtxq_unlock(&uq->uq_key);
2723 }
2724 out:
2725 umtx_key_release(&uq->uq_key);
2726 return (error);
2727 }
2728
2729 int
2730 _umtx_lock(struct thread *td, struct _umtx_lock_args *uap)
2731 /* struct umtx *umtx */
2732 {
2733 return _do_lock_umtx(td, uap->umtx, td->td_tid, 0);
2734 }
2735
2736 int
2737 _umtx_unlock(struct thread *td, struct _umtx_unlock_args *uap)
2738 /* struct umtx *umtx */
2739 {
2740 return do_unlock_umtx(td, uap->umtx, td->td_tid);
2741 }
2742
2743 static int
2744 __umtx_op_lock_umtx(struct thread *td, struct _umtx_op_args *uap)
2745 {
2746 struct timespec *ts, timeout;
2747 int error;
2748
2749 /* Allow a null timespec (wait forever). */
2750 if (uap->uaddr2 == NULL)
2751 ts = NULL;
2752 else {
2753 error = copyin(uap->uaddr2, &timeout, sizeof(timeout));
2754 if (error != 0)
2755 return (error);
2756 if (timeout.tv_nsec >= 1000000000 ||
2757 timeout.tv_nsec < 0) {
2758 return (EINVAL);
2759 }
2760 ts = &timeout;
2761 }
2762 return (do_lock_umtx(td, uap->obj, uap->val, ts));
2763 }
2764
2765 static int
2766 __umtx_op_unlock_umtx(struct thread *td, struct _umtx_op_args *uap)
2767 {
2768 return (do_unlock_umtx(td, uap->obj, uap->val));
2769 }
2770
2771 static int
2772 __umtx_op_wait(struct thread *td, struct _umtx_op_args *uap)
2773 {
2774 struct timespec *ts, timeout;
2775 int error;
2776
2777 if (uap->uaddr2 == NULL)
2778 ts = NULL;
2779 else {
2780 error = copyin(uap->uaddr2, &timeout, sizeof(timeout));
2781 if (error != 0)
2782 return (error);
2783 if (timeout.tv_nsec >= 1000000000 ||
2784 timeout.tv_nsec < 0)
2785 return (EINVAL);
2786 ts = &timeout;
2787 }
2788 return do_wait(td, uap->obj, uap->val, ts, 0, 0);
2789 }
2790
2791 static int
2792 __umtx_op_wait_uint(struct thread *td, struct _umtx_op_args *uap)
2793 {
2794 struct timespec *ts, timeout;
2795 int error;
2796
2797 if (uap->uaddr2 == NULL)
2798 ts = NULL;
2799 else {
2800 error = copyin(uap->uaddr2, &timeout, sizeof(timeout));
2801 if (error != 0)
2802 return (error);
2803 if (timeout.tv_nsec >= 1000000000 ||
2804 timeout.tv_nsec < 0)
2805 return (EINVAL);
2806 ts = &timeout;
2807 }
2808 return do_wait(td, uap->obj, uap->val, ts, 1, 0);
2809 }
2810
2811 static int
2812 __umtx_op_wait_uint_private(struct thread *td, struct _umtx_op_args *uap)
2813 {
2814 struct timespec *ts, timeout;
2815 int error;
2816
2817 if (uap->uaddr2 == NULL)
2818 ts = NULL;
2819 else {
2820 error = copyin(uap->uaddr2, &timeout, sizeof(timeout));
2821 if (error != 0)
2822 return (error);
2823 if (timeout.tv_nsec >= 1000000000 ||
2824 timeout.tv_nsec < 0)
2825 return (EINVAL);
2826 ts = &timeout;
2827 }
2828 return do_wait(td, uap->obj, uap->val, ts, 1, 1);
2829 }
2830
2831 static int
2832 __umtx_op_wake(struct thread *td, struct _umtx_op_args *uap)
2833 {
2834 return (kern_umtx_wake(td, uap->obj, uap->val, 0));
2835 }
2836
2837 static int
2838 __umtx_op_wake_private(struct thread *td, struct _umtx_op_args *uap)
2839 {
2840 return (kern_umtx_wake(td, uap->obj, uap->val, 1));
2841 }
2842
2843 static int
2844 __umtx_op_lock_umutex(struct thread *td, struct _umtx_op_args *uap)
2845 {
2846 struct timespec *ts, timeout;
2847 int error;
2848
2849 /* Allow a null timespec (wait forever). */
2850 if (uap->uaddr2 == NULL)
2851 ts = NULL;
2852 else {
2853 error = copyin(uap->uaddr2, &timeout,
2854 sizeof(timeout));
2855 if (error != 0)
2856 return (error);
2857 if (timeout.tv_nsec >= 1000000000 ||
2858 timeout.tv_nsec < 0) {
2859 return (EINVAL);
2860 }
2861 ts = &timeout;
2862 }
2863 return do_lock_umutex(td, uap->obj, ts, 0);
2864 }
2865
2866 static int
2867 __umtx_op_trylock_umutex(struct thread *td, struct _umtx_op_args *uap)
2868 {
2869 return do_lock_umutex(td, uap->obj, NULL, _UMUTEX_TRY);
2870 }
2871
2872 static int
2873 __umtx_op_wait_umutex(struct thread *td, struct _umtx_op_args *uap)
2874 {
2875 struct timespec *ts, timeout;
2876 int error;
2877
2878 /* Allow a null timespec (wait forever). */
2879 if (uap->uaddr2 == NULL)
2880 ts = NULL;
2881 else {
2882 error = copyin(uap->uaddr2, &timeout,
2883 sizeof(timeout));
2884 if (error != 0)
2885 return (error);
2886 if (timeout.tv_nsec >= 1000000000 ||
2887 timeout.tv_nsec < 0) {
2888 return (EINVAL);
2889 }
2890 ts = &timeout;
2891 }
2892 return do_lock_umutex(td, uap->obj, ts, _UMUTEX_WAIT);
2893 }
2894
2895 static int
2896 __umtx_op_wake_umutex(struct thread *td, struct _umtx_op_args *uap)
2897 {
2898 return do_wake_umutex(td, uap->obj);
2899 }
2900
2901 static int
2902 __umtx_op_unlock_umutex(struct thread *td, struct _umtx_op_args *uap)
2903 {
2904 return do_unlock_umutex(td, uap->obj);
2905 }
2906
2907 static int
2908 __umtx_op_set_ceiling(struct thread *td, struct _umtx_op_args *uap)
2909 {
2910 return do_set_ceiling(td, uap->obj, uap->val, uap->uaddr1);
2911 }
2912
2913 static int
2914 __umtx_op_cv_wait(struct thread *td, struct _umtx_op_args *uap)
2915 {
2916 struct timespec *ts, timeout;
2917 int error;
2918
2919 /* Allow a null timespec (wait forever). */
2920 if (uap->uaddr2 == NULL)
2921 ts = NULL;
2922 else {
2923 error = copyin(uap->uaddr2, &timeout,
2924 sizeof(timeout));
2925 if (error != 0)
2926 return (error);
2927 if (timeout.tv_nsec >= 1000000000 ||
2928 timeout.tv_nsec < 0) {
2929 return (EINVAL);
2930 }
2931 ts = &timeout;
2932 }
2933 return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val));
2934 }
2935
2936 static int
2937 __umtx_op_cv_signal(struct thread *td, struct _umtx_op_args *uap)
2938 {
2939 return do_cv_signal(td, uap->obj);
2940 }
2941
2942 static int
2943 __umtx_op_cv_broadcast(struct thread *td, struct _umtx_op_args *uap)
2944 {
2945 return do_cv_broadcast(td, uap->obj);
2946 }
2947
2948 static int
2949 __umtx_op_rw_rdlock(struct thread *td, struct _umtx_op_args *uap)
2950 {
2951 struct timespec timeout;
2952 int error;
2953
2954 /* Allow a null timespec (wait forever). */
2955 if (uap->uaddr2 == NULL) {
2956 error = do_rw_rdlock(td, uap->obj, uap->val, 0);
2957 } else {
2958 error = copyin(uap->uaddr2, &timeout,
2959 sizeof(timeout));
2960 if (error != 0)
2961 return (error);
2962 if (timeout.tv_nsec >= 1000000000 ||
2963 timeout.tv_nsec < 0) {
2964 return (EINVAL);
2965 }
2966 error = do_rw_rdlock2(td, uap->obj, uap->val, &timeout);
2967 }
2968 return (error);
2969 }
2970
2971 static int
2972 __umtx_op_rw_wrlock(struct thread *td, struct _umtx_op_args *uap)
2973 {
2974 struct timespec timeout;
2975 int error;
2976
2977 /* Allow a null timespec (wait forever). */
2978 if (uap->uaddr2 == NULL) {
2979 error = do_rw_wrlock(td, uap->obj, 0);
2980 } else {
2981 error = copyin(uap->uaddr2, &timeout,
2982 sizeof(timeout));
2983 if (error != 0)
2984 return (error);
2985 if (timeout.tv_nsec >= 1000000000 ||
2986 timeout.tv_nsec < 0) {
2987 return (EINVAL);
2988 }
2989
2990 error = do_rw_wrlock2(td, uap->obj, &timeout);
2991 }
2992 return (error);
2993 }
2994
2995 static int
2996 __umtx_op_rw_unlock(struct thread *td, struct _umtx_op_args *uap)
2997 {
2998 return do_rw_unlock(td, uap->obj);
2999 }
3000
3001 typedef int (*_umtx_op_func)(struct thread *td, struct _umtx_op_args *uap);
3002
3003 static _umtx_op_func op_table[] = {
3004 __umtx_op_lock_umtx, /* UMTX_OP_LOCK */
3005 __umtx_op_unlock_umtx, /* UMTX_OP_UNLOCK */
3006 __umtx_op_wait, /* UMTX_OP_WAIT */
3007 __umtx_op_wake, /* UMTX_OP_WAKE */
3008 __umtx_op_trylock_umutex, /* UMTX_OP_MUTEX_TRYLOCK */
3009 __umtx_op_lock_umutex, /* UMTX_OP_MUTEX_LOCK */
3010 __umtx_op_unlock_umutex, /* UMTX_OP_MUTEX_UNLOCK */
3011 __umtx_op_set_ceiling, /* UMTX_OP_SET_CEILING */
3012 __umtx_op_cv_wait, /* UMTX_OP_CV_WAIT*/
3013 __umtx_op_cv_signal, /* UMTX_OP_CV_SIGNAL */
3014 __umtx_op_cv_broadcast, /* UMTX_OP_CV_BROADCAST */
3015 __umtx_op_wait_uint, /* UMTX_OP_WAIT_UINT */
3016 __umtx_op_rw_rdlock, /* UMTX_OP_RW_RDLOCK */
3017 __umtx_op_rw_wrlock, /* UMTX_OP_RW_WRLOCK */
3018 __umtx_op_rw_unlock, /* UMTX_OP_RW_UNLOCK */
3019 __umtx_op_wait_uint_private, /* UMTX_OP_WAIT_UINT_PRIVATE */
3020 __umtx_op_wake_private, /* UMTX_OP_WAKE_PRIVATE */
3021 __umtx_op_wait_umutex, /* UMTX_OP_UMUTEX_WAIT */
3022 __umtx_op_wake_umutex /* UMTX_OP_UMUTEX_WAKE */
3023 };
3024
3025 int
3026 _umtx_op(struct thread *td, struct _umtx_op_args *uap)
3027 {
3028 if ((unsigned)uap->op < UMTX_OP_MAX)
3029 return (*op_table[uap->op])(td, uap);
3030 return (EINVAL);
3031 }
3032
3033 #ifdef COMPAT_IA32
3034 int
3035 freebsd32_umtx_lock(struct thread *td, struct freebsd32_umtx_lock_args *uap)
3036 /* struct umtx *umtx */
3037 {
3038 return (do_lock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid, NULL));
3039 }
3040
3041 int
3042 freebsd32_umtx_unlock(struct thread *td, struct freebsd32_umtx_unlock_args *uap)
3043 /* struct umtx *umtx */
3044 {
3045 return (do_unlock_umtx32(td, (uint32_t *)uap->umtx, td->td_tid));
3046 }
3047
3048 struct timespec32 {
3049 u_int32_t tv_sec;
3050 u_int32_t tv_nsec;
3051 };
3052
3053 static inline int
3054 copyin_timeout32(void *addr, struct timespec *tsp)
3055 {
3056 struct timespec32 ts32;
3057 int error;
3058
3059 error = copyin(addr, &ts32, sizeof(struct timespec32));
3060 if (error == 0) {
3061 tsp->tv_sec = ts32.tv_sec;
3062 tsp->tv_nsec = ts32.tv_nsec;
3063 }
3064 return (error);
3065 }
3066
3067 static int
3068 __umtx_op_lock_umtx_compat32(struct thread *td, struct _umtx_op_args *uap)
3069 {
3070 struct timespec *ts, timeout;
3071 int error;
3072
3073 /* Allow a null timespec (wait forever). */
3074 if (uap->uaddr2 == NULL)
3075 ts = NULL;
3076 else {
3077 error = copyin_timeout32(uap->uaddr2, &timeout);
3078 if (error != 0)
3079 return (error);
3080 if (timeout.tv_nsec >= 1000000000 ||
3081 timeout.tv_nsec < 0) {
3082 return (EINVAL);
3083 }
3084 ts = &timeout;
3085 }
3086 return (do_lock_umtx32(td, uap->obj, uap->val, ts));
3087 }
3088
3089 static int
3090 __umtx_op_unlock_umtx_compat32(struct thread *td, struct _umtx_op_args *uap)
3091 {
3092 return (do_unlock_umtx32(td, uap->obj, (uint32_t)uap->val));
3093 }
3094
3095 static int
3096 __umtx_op_wait_compat32(struct thread *td, struct _umtx_op_args *uap)
3097 {
3098 struct timespec *ts, timeout;
3099 int error;
3100
3101 if (uap->uaddr2 == NULL)
3102 ts = NULL;
3103 else {
3104 error = copyin_timeout32(uap->uaddr2, &timeout);
3105 if (error != 0)
3106 return (error);
3107 if (timeout.tv_nsec >= 1000000000 ||
3108 timeout.tv_nsec < 0)
3109 return (EINVAL);
3110 ts = &timeout;
3111 }
3112 return do_wait(td, uap->obj, uap->val, ts, 1, 0);
3113 }
3114
3115 static int
3116 __umtx_op_lock_umutex_compat32(struct thread *td, struct _umtx_op_args *uap)
3117 {
3118 struct timespec *ts, timeout;
3119 int error;
3120
3121 /* Allow a null timespec (wait forever). */
3122 if (uap->uaddr2 == NULL)
3123 ts = NULL;
3124 else {
3125 error = copyin_timeout32(uap->uaddr2, &timeout);
3126 if (error != 0)
3127 return (error);
3128 if (timeout.tv_nsec >= 1000000000 ||
3129 timeout.tv_nsec < 0)
3130 return (EINVAL);
3131 ts = &timeout;
3132 }
3133 return do_lock_umutex(td, uap->obj, ts, 0);
3134 }
3135
3136 static int
3137 __umtx_op_wait_umutex_compat32(struct thread *td, struct _umtx_op_args *uap)
3138 {
3139 struct timespec *ts, timeout;
3140 int error;
3141
3142 /* Allow a null timespec (wait forever). */
3143 if (uap->uaddr2 == NULL)
3144 ts = NULL;
3145 else {
3146 error = copyin_timeout32(uap->uaddr2, &timeout);
3147 if (error != 0)
3148 return (error);
3149 if (timeout.tv_nsec >= 1000000000 ||
3150 timeout.tv_nsec < 0)
3151 return (EINVAL);
3152 ts = &timeout;
3153 }
3154 return do_lock_umutex(td, uap->obj, ts, _UMUTEX_WAIT);
3155 }
3156
3157 static int
3158 __umtx_op_cv_wait_compat32(struct thread *td, struct _umtx_op_args *uap)
3159 {
3160 struct timespec *ts, timeout;
3161 int error;
3162
3163 /* Allow a null timespec (wait forever). */
3164 if (uap->uaddr2 == NULL)
3165 ts = NULL;
3166 else {
3167 error = copyin_timeout32(uap->uaddr2, &timeout);
3168 if (error != 0)
3169 return (error);
3170 if (timeout.tv_nsec >= 1000000000 ||
3171 timeout.tv_nsec < 0)
3172 return (EINVAL);
3173 ts = &timeout;
3174 }
3175 return (do_cv_wait(td, uap->obj, uap->uaddr1, ts, uap->val));
3176 }
3177
3178 static int
3179 __umtx_op_rw_rdlock_compat32(struct thread *td, struct _umtx_op_args *uap)
3180 {
3181 struct timespec timeout;
3182 int error;
3183
3184 /* Allow a null timespec (wait forever). */
3185 if (uap->uaddr2 == NULL) {
3186 error = do_rw_rdlock(td, uap->obj, uap->val, 0);
3187 } else {
3188 error = copyin(uap->uaddr2, &timeout,
3189 sizeof(timeout));
3190 if (error != 0)
3191 return (error);
3192 if (timeout.tv_nsec >= 1000000000 ||
3193 timeout.tv_nsec < 0) {
3194 return (EINVAL);
3195 }
3196 error = do_rw_rdlock2(td, uap->obj, uap->val, &timeout);
3197 }
3198 return (error);
3199 }
3200
3201 static int
3202 __umtx_op_rw_wrlock_compat32(struct thread *td, struct _umtx_op_args *uap)
3203 {
3204 struct timespec timeout;
3205 int error;
3206
3207 /* Allow a null timespec (wait forever). */
3208 if (uap->uaddr2 == NULL) {
3209 error = do_rw_wrlock(td, uap->obj, 0);
3210 } else {
3211 error = copyin_timeout32(uap->uaddr2, &timeout);
3212 if (error != 0)
3213 return (error);
3214 if (timeout.tv_nsec >= 1000000000 ||
3215 timeout.tv_nsec < 0) {
3216 return (EINVAL);
3217 }
3218
3219 error = do_rw_wrlock2(td, uap->obj, &timeout);
3220 }
3221 return (error);
3222 }
3223
3224 static int
3225 __umtx_op_wait_uint_private_compat32(struct thread *td, struct _umtx_op_args *uap)
3226 {
3227 struct timespec *ts, timeout;
3228 int error;
3229
3230 if (uap->uaddr2 == NULL)
3231 ts = NULL;
3232 else {
3233 error = copyin_timeout32(uap->uaddr2, &timeout);
3234 if (error != 0)
3235 return (error);
3236 if (timeout.tv_nsec >= 1000000000 ||
3237 timeout.tv_nsec < 0)
3238 return (EINVAL);
3239 ts = &timeout;
3240 }
3241 return do_wait(td, uap->obj, uap->val, ts, 1, 1);
3242 }
3243
3244 static _umtx_op_func op_table_compat32[] = {
3245 __umtx_op_lock_umtx_compat32, /* UMTX_OP_LOCK */
3246 __umtx_op_unlock_umtx_compat32, /* UMTX_OP_UNLOCK */
3247 __umtx_op_wait_compat32, /* UMTX_OP_WAIT */
3248 __umtx_op_wake, /* UMTX_OP_WAKE */
3249 __umtx_op_trylock_umutex, /* UMTX_OP_MUTEX_LOCK */
3250 __umtx_op_lock_umutex_compat32, /* UMTX_OP_MUTEX_TRYLOCK */
3251 __umtx_op_unlock_umutex, /* UMTX_OP_MUTEX_UNLOCK */
3252 __umtx_op_set_ceiling, /* UMTX_OP_SET_CEILING */
3253 __umtx_op_cv_wait_compat32, /* UMTX_OP_CV_WAIT*/
3254 __umtx_op_cv_signal, /* UMTX_OP_CV_SIGNAL */
3255 __umtx_op_cv_broadcast, /* UMTX_OP_CV_BROADCAST */
3256 __umtx_op_wait_compat32, /* UMTX_OP_WAIT_UINT */
3257 __umtx_op_rw_rdlock_compat32, /* UMTX_OP_RW_RDLOCK */
3258 __umtx_op_rw_wrlock_compat32, /* UMTX_OP_RW_WRLOCK */
3259 __umtx_op_rw_unlock, /* UMTX_OP_RW_UNLOCK */
3260 __umtx_op_wait_uint_private_compat32, /* UMTX_OP_WAIT_UINT_PRIVATE */
3261 __umtx_op_wake_private, /* UMTX_OP_WAKE_PRIVATE */
3262 __umtx_op_wait_umutex_compat32, /* UMTX_OP_UMUTEX_WAIT */
3263 __umtx_op_wake_umutex /* UMTX_OP_UMUTEX_WAKE */
3264 };
3265
3266 int
3267 freebsd32_umtx_op(struct thread *td, struct freebsd32_umtx_op_args *uap)
3268 {
3269 if ((unsigned)uap->op < UMTX_OP_MAX)
3270 return (*op_table_compat32[uap->op])(td,
3271 (struct _umtx_op_args *)uap);
3272 return (EINVAL);
3273 }
3274 #endif
3275
3276 void
3277 umtx_thread_init(struct thread *td)
3278 {
3279 td->td_umtxq = umtxq_alloc();
3280 td->td_umtxq->uq_thread = td;
3281 }
3282
3283 void
3284 umtx_thread_fini(struct thread *td)
3285 {
3286 umtxq_free(td->td_umtxq);
3287 }
3288
3289 /*
3290 * It will be called when new thread is created, e.g fork().
3291 */
3292 void
3293 umtx_thread_alloc(struct thread *td)
3294 {
3295 struct umtx_q *uq;
3296
3297 uq = td->td_umtxq;
3298 uq->uq_inherited_pri = PRI_MAX;
3299
3300 KASSERT(uq->uq_flags == 0, ("uq_flags != 0"));
3301 KASSERT(uq->uq_thread == td, ("uq_thread != td"));
3302 KASSERT(uq->uq_pi_blocked == NULL, ("uq_pi_blocked != NULL"));
3303 KASSERT(TAILQ_EMPTY(&uq->uq_pi_contested), ("uq_pi_contested is not empty"));
3304 }
3305
3306 /*
3307 * exec() hook.
3308 */
3309 static void
3310 umtx_exec_hook(void *arg __unused, struct proc *p __unused,
3311 struct image_params *imgp __unused)
3312 {
3313 umtx_thread_cleanup(curthread);
3314 }
3315
3316 /*
3317 * thread_exit() hook.
3318 */
3319 void
3320 umtx_thread_exit(struct thread *td)
3321 {
3322 umtx_thread_cleanup(td);
3323 }
3324
3325 /*
3326 * clean up umtx data.
3327 */
3328 static void
3329 umtx_thread_cleanup(struct thread *td)
3330 {
3331 struct umtx_q *uq;
3332 struct umtx_pi *pi;
3333
3334 if ((uq = td->td_umtxq) == NULL)
3335 return;
3336
3337 mtx_lock_spin(&umtx_lock);
3338 uq->uq_inherited_pri = PRI_MAX;
3339 while ((pi = TAILQ_FIRST(&uq->uq_pi_contested)) != NULL) {
3340 pi->pi_owner = NULL;
3341 TAILQ_REMOVE(&uq->uq_pi_contested, pi, pi_link);
3342 }
3343 thread_lock(td);
3344 td->td_flags &= ~TDF_UBORROWING;
3345 thread_unlock(td);
3346 mtx_unlock_spin(&umtx_lock);
3347 }
Cache object: be28968aa6909934868b803f3dc5474e
|